Return-Path: X-Original-To: apmail-geode-commits-archive@minotaur.apache.org Delivered-To: apmail-geode-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 0833B1869A for ; Tue, 17 Nov 2015 20:54:16 +0000 (UTC) Received: (qmail 90867 invoked by uid 500); 17 Nov 2015 20:54:16 -0000 Delivered-To: apmail-geode-commits-archive@geode.apache.org Received: (qmail 90833 invoked by uid 500); 17 Nov 2015 20:54:15 -0000 Mailing-List: contact commits-help@geode.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@geode.incubator.apache.org Delivered-To: mailing list commits@geode.incubator.apache.org Received: (qmail 90824 invoked by uid 99); 17 Nov 2015 20:54:15 -0000 Received: from Unknown (HELO spamd2-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 17 Nov 2015 20:54:15 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd2-us-west.apache.org (ASF Mail Server at spamd2-us-west.apache.org) with ESMTP id 775441A2335 for ; Tue, 17 Nov 2015 20:54:15 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd2-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: 1.77 X-Spam-Level: * X-Spam-Status: No, score=1.77 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, KAM_LAZY_DOMAIN_SECURITY=1, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, T_RP_MATCHES_RCVD=-0.01] autolearn=disabled Received: from mx1-us-east.apache.org ([10.40.0.8]) by localhost (spamd2-us-west.apache.org [10.40.0.9]) (amavisd-new, port 10024) with ESMTP id DXVU0wBn2jEJ for ; Tue, 17 Nov 2015 20:54:12 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx1-us-east.apache.org (ASF Mail Server at mx1-us-east.apache.org) with SMTP id 444CD42B7B for ; Tue, 17 Nov 2015 20:54:12 +0000 (UTC) Received: (qmail 90417 invoked by uid 99); 17 Nov 2015 20:54:11 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 17 Nov 2015 20:54:11 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id AC7E4E02FF; Tue, 17 Nov 2015 20:54:11 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: bschuchardt@apache.org To: commits@geode.incubator.apache.org Date: Tue, 17 Nov 2015 20:54:11 -0000 Message-Id: <12a799373ce2463aacd048e7f6b8801a@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [01/50] [abbrv] incubator-geode git commit: Adding a microbenchmark of create and query performance. Repository: incubator-geode Updated Branches: refs/heads/feature/GEODE-77 fd98f62f8 -> 766dfd05b Adding a microbenchmark of create and query performance. Project: http://git-wip-us.apache.org/repos/asf/incubator-geode/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-geode/commit/1f597bb7 Tree: http://git-wip-us.apache.org/repos/asf/incubator-geode/tree/1f597bb7 Diff: http://git-wip-us.apache.org/repos/asf/incubator-geode/diff/1f597bb7 Branch: refs/heads/feature/GEODE-77 Commit: 1f597bb707fb8482a3808e085c86d3c6b9d25fcf Parents: d623cf6 Author: Dan Smith Authored: Fri Oct 16 14:53:58 2015 -0700 Committer: Dan Smith Committed: Fri Oct 16 14:54:50 2015 -0700 ---------------------------------------------------------------------- ...IndexRepositoryImplJUnitPerformanceTest.java | 422 +++++++++++++++++++ 1 file changed, 422 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/1f597bb7/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitPerformanceTest.java ---------------------------------------------------------------------- diff --git a/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitPerformanceTest.java b/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitPerformanceTest.java new file mode 100644 index 0000000..8641254 --- /dev/null +++ b/gemfire-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImplJUnitPerformanceTest.java @@ -0,0 +1,422 @@ +package com.gemstone.gemfire.cache.lucene.internal.repository; + +import static org.junit.Assert.*; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SearcherManager; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.RAMDirectory; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import com.carrotsearch.randomizedtesting.generators.RandomStrings; +import com.gemstone.gemfire.DataSerializable; +import com.gemstone.gemfire.DataSerializer; +import com.gemstone.gemfire.cache.Cache; +import com.gemstone.gemfire.cache.CacheFactory; +import com.gemstone.gemfire.cache.PartitionAttributesFactory; +import com.gemstone.gemfire.cache.Region; +import com.gemstone.gemfire.cache.RegionShortcut; +import com.gemstone.gemfire.cache.asyncqueue.AsyncEventQueue; +import com.gemstone.gemfire.cache.lucene.LuceneIndex; +import com.gemstone.gemfire.cache.lucene.LuceneQuery; +import com.gemstone.gemfire.cache.lucene.LuceneQueryProvider; +import com.gemstone.gemfire.cache.lucene.LuceneQueryResults; +import com.gemstone.gemfire.cache.lucene.LuceneService; +import com.gemstone.gemfire.cache.lucene.LuceneServiceProvider; +import com.gemstone.gemfire.cache.lucene.internal.LuceneServiceImpl; +import com.gemstone.gemfire.cache.lucene.internal.directory.RegionDirectory; +import com.gemstone.gemfire.cache.lucene.internal.distributed.TopEntriesCollector; +import com.gemstone.gemfire.cache.lucene.internal.filesystem.ChunkKey; +import com.gemstone.gemfire.cache.lucene.internal.filesystem.File; +import com.gemstone.gemfire.cache.lucene.internal.repository.serializer.HeterogenousLuceneSerializer; +import com.gemstone.gemfire.cache.query.QueryException; +import com.gemstone.gemfire.test.junit.categories.PerformanceTest; + +/** + * Microbenchmark of the IndexRepository to compare an + * IndexRepository built on top of cache with a + * stock lucene IndexWriter with a RAMDirectory. + */ +@Category(PerformanceTest.class) +public class IndexRepositoryImplJUnitPerformanceTest { + + private static final int NUM_WORDS = 1000; + private static int[] COMMIT_INTERVAL = new int[] {100, 1000, 5000}; + private static int NUM_ENTRIES = 500_000; + private static int NUM_QUERIES = 500_000; + + private StandardAnalyzer analyzer = new StandardAnalyzer(); + + @Test + public void testIndexRepository() throws Exception { + + + doTest("IndexRepository", new TestCallbacks() { + + private Cache cache; + private IndexRepositoryImpl repo; + private IndexWriter writer; + + @Override + public void addObject(String key, String text) throws Exception { + repo.create(key, new TestObject(text)); + } + + @Override + public void commit() throws Exception { + repo.commit(); + } + + @Override + public void init() throws Exception { + cache = new CacheFactory().set("mcast-port", "0") + .set("log-level", "error") + .create(); + Region fileRegion = cache.createRegionFactory(RegionShortcut.REPLICATE).create("files"); + Region chunkRegion = cache.createRegionFactory(RegionShortcut.REPLICATE).create("chunks"); + + RegionDirectory dir = new RegionDirectory(fileRegion, chunkRegion); + + + IndexWriterConfig config = new IndexWriterConfig(analyzer); + writer = new IndexWriter(dir, config); + String[] indexedFields= new String[] {"text"}; + HeterogenousLuceneSerializer mapper = new HeterogenousLuceneSerializer(indexedFields); + repo = new IndexRepositoryImpl(fileRegion, writer, mapper); + } + + @Override + public void cleanup() throws IOException { + writer.close(); + cache.close(); + } + + @Override + public void waitForAsync() throws Exception { + //do nothing + } + + @Override + public int query(Query query) throws IOException { + TopEntriesCollector collector = new TopEntriesCollector(); + repo.query(query, 100, collector); + return collector.size(); + } + }); + } + + /** + * Test our full lucene index implementation + * @throws Exception + */ + @Test + public void testLuceneIndex() throws Exception { + + + doTest("LuceneIndex", new TestCallbacks() { + + private Cache cache; + private Region region; + private LuceneService service; + + @Override + public void addObject(String key, String text) throws Exception { + region.create(key, new TestObject(text)); + } + + @Override + public void commit() throws Exception { + //NA + } + + @Override + public void init() throws Exception { + cache = new CacheFactory().set("mcast-port", "0") + .set("log-level", "warning") + .create(); + service = LuceneServiceProvider.get(cache); + service.createIndex("index", "/region", "text"); + region = cache.createRegionFactory(RegionShortcut.PARTITION) + .setPartitionAttributes(new PartitionAttributesFactory<>().setTotalNumBuckets(1).create()) + .create("region"); + } + + @Override + public void cleanup() throws IOException { + cache.close(); + } + + @Override + public void waitForAsync() throws Exception { + AsyncEventQueue aeq = cache.getAsyncEventQueue(LuceneServiceImpl.getUniqueIndexName("index", "/region")); + + //We will be at most 10 ms off + while(aeq.size() > 0) { + Thread.sleep(10); + } + } + + @Override + public int query(final Query query) throws Exception { + LuceneQuery luceneQuery = service.createLuceneQueryFactory().create("index", "/region", new LuceneQueryProvider() { + + @Override + public Query getQuery(LuceneIndex index) throws QueryException { + return query; + } + }); + + LuceneQueryResults results = luceneQuery.search(); + return results.size(); + } + }); + } + + @Test + public void testLuceneWithRegionDirectory() throws Exception { + doTest("RegionDirectory", new TestCallbacks() { + + private IndexWriter writer; + private SearcherManager searcherManager; + + @Override + public void init() throws Exception { + RegionDirectory dir = new RegionDirectory(new ConcurrentHashMap(), new ConcurrentHashMap()); + IndexWriterConfig config = new IndexWriterConfig(analyzer); + writer = new IndexWriter(dir, config); + searcherManager = new SearcherManager(writer, true, null); + } + + @Override + public void addObject(String key, String text) throws Exception { + Document doc = new Document(); + doc.add(new TextField("key", key, Store.YES)); + doc.add(new TextField("text", text, Store.NO)); + writer.addDocument(doc); + } + + @Override + public void commit() throws Exception { + writer.commit(); + searcherManager.maybeRefresh(); + } + + @Override + public void cleanup() throws Exception { + writer.close(); + } + + @Override + public void waitForAsync() throws Exception { + //do nothing + } + + @Override + public int query(Query query) throws Exception { + IndexSearcher searcher = searcherManager.acquire(); + try { + return searcher.count(query); + } finally { + searcherManager.release(searcher); + } + } + + }); + + } + + @Test + public void testLucene() throws Exception { + doTest("Lucene", new TestCallbacks() { + + private IndexWriter writer; + private SearcherManager searcherManager; + + @Override + public void init() throws Exception { + RAMDirectory dir = new RAMDirectory(); + IndexWriterConfig config = new IndexWriterConfig(analyzer); + writer = new IndexWriter(dir, config); + searcherManager = new SearcherManager(writer, true, null); + } + + @Override + public void addObject(String key, String text) throws Exception { + Document doc = new Document(); + doc.add(new TextField("key", key, Store.YES)); + doc.add(new TextField("text", text, Store.NO)); + writer.addDocument(doc); + } + + @Override + public void commit() throws Exception { + writer.commit(); + searcherManager.maybeRefresh(); + } + + @Override + public void cleanup() throws Exception { + writer.close(); + } + + @Override + public void waitForAsync() throws Exception { + //do nothing + } + + @Override + public int query(Query query) throws Exception { + IndexSearcher searcher = searcherManager.acquire(); + try { + return searcher.count(query); + } finally { + searcherManager.release(searcher); + } + } + + }); + + } + + private void doTest(String testName, TestCallbacks callbacks) throws Exception { + + //Create some random words. We need to be careful + //to make sure we get NUM_WORDS distinct words here + Set wordSet = new HashSet(); + Random rand = new Random(); + while(wordSet.size() < NUM_WORDS) { + int length = rand.nextInt(12) + 3; + char[] text = new char[length]; + for(int i = 0; i < length; i++) { + text[i] = (char) (rand.nextInt(26) + 97); + } + wordSet.add(new String(text)); + } + List words = new ArrayList(wordSet.size()); + words.addAll(wordSet); + + + + //warm up + writeRandomWords(callbacks, words, rand, NUM_ENTRIES / 10, NUM_QUERIES / 10, COMMIT_INTERVAL[0]); + + //Do the actual test + + for(int i = 0; i < COMMIT_INTERVAL.length; i++) { + Results results = writeRandomWords(callbacks, words, rand, NUM_ENTRIES, NUM_QUERIES / 10, COMMIT_INTERVAL[i]); + + System.out.println(testName + " writes(entries=" + NUM_ENTRIES + ", commit=" + COMMIT_INTERVAL[i] + "): " + TimeUnit.NANOSECONDS.toMillis(results.writeTime)); + System.out.println(testName + " queries(entries=" + NUM_ENTRIES + ", commit=" + COMMIT_INTERVAL[i] + "): " + TimeUnit.NANOSECONDS.toMillis(results.queryTime)); + } + } + + private Results writeRandomWords(TestCallbacks callbacks, List words, + Random rand, int numEntries, int numQueries, int commitInterval) throws Exception { + Results results = new Results(); + callbacks.init(); + int[] counts = new int[words.size()]; + long start = System.nanoTime(); + try { + for(int i =0; i < numEntries; i++) { + int word1 = rand.nextInt(words.size()); + int word2 = rand.nextInt(words.size()); + counts[word1]++; + counts[word2]++; + String value = words.get(word1) + " " + words.get(word2); + callbacks.addObject("key" + i, value); + + if(i % commitInterval == 0 && i != 0) { + callbacks.commit(); + } + } + callbacks.commit(); + callbacks.waitForAsync(); + long end = System.nanoTime(); + results.writeTime = end - start; + + + start = System.nanoTime(); + for(int i=0; i < numQueries; i++) { + int wordIndex = rand.nextInt(words.size()); + String word = words.get(wordIndex); + Query query = new TermQuery(new Term("text", word)); + int size = callbacks.query(query); +// int size = callbacks.query(parser.parse(word)); + //All of my tests sometimes seem to be missing a couple of words, including the stock lucene +// assertEquals("Error on query " + i + " word=" + word, counts[wordIndex], size); + } + end = System.nanoTime(); + results.queryTime = end - start; + + return results; + } finally { + callbacks.cleanup(); + } + } + + private static class TestObject implements DataSerializable { + private String text; + + public TestObject() { + + } + + public TestObject(String text) { + super(); + this.text = text; + } + + @Override + public void toData(DataOutput out) throws IOException { + DataSerializer.writeString(text, out); + } + + @Override + public void fromData(DataInput in) + throws IOException, ClassNotFoundException { + text = DataSerializer.readString(in); + } + + @Override + public String toString() { + return text; + } + + + } + + private interface TestCallbacks { + public void init() throws Exception; + public int query(Query query) throws Exception; + public void addObject(String key, String text) throws Exception; + public void commit() throws Exception; + public void waitForAsync() throws Exception; + public void cleanup() throws Exception; + } + + private static class Results { + long writeTime; + long queryTime; + } +}