lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From romseyg...@apache.org
Subject svn commit: r1412849 [7/13] - in /lucene/dev/branches/LUCENE-2878: ./ dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/ dev-tools/idea/solr/contrib/dataimporthandler/ dev-tools/maven/ dev-tools/maven/solr/contrib/dataimporthandler/ dev-tool...
Date Fri, 23 Nov 2012 12:01:26 GMT
Modified: lucene/dev/branches/LUCENE-2878/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java Fri Nov 23 12:00:32 2012
@@ -5,18 +5,17 @@ import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.concurrent.atomic.AtomicBoolean;
 
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.LockObtainFailedException;
 import org.apache.lucene.store.RAMDirectory;
-import org.junit.Ignore;
-import org.junit.Test;
-
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenArrays;
-import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
-import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 import org.apache.lucene.util.SlowRAMDirectory;
+import org.junit.Test;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -35,6 +34,8 @@ import org.apache.lucene.util.SlowRAMDir
  * limitations under the License.
  */
 
+// TODO: remove this suppress after we fix the TaxoWriter Codec to a non-default (see todo in DirTW)
+@SuppressCodecs("SimpleText")
 public class TestTaxonomyCombined extends LuceneTestCase {
 
   /**  The following categories will be added to the taxonomy by
@@ -725,7 +726,10 @@ public class TestTaxonomyCombined extend
     assertEquals(3, ca.getOlderSiblingArray().length);
     assertEquals(3, ca.getYoungestChildArray().length);
     // After the refresh, things change:
-    tr.refresh();
+    TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
+    assertNotNull(newtr);
+    tr.close();
+    tr = newtr;
     ca = tr.getChildrenArrays();
     assertEquals(5, tr.getSize());
     assertEquals(5, ca.getOlderSiblingArray().length);
@@ -737,14 +741,11 @@ public class TestTaxonomyCombined extend
     indexDir.close();
   }
   
-  /**
-   * Test that getParentArrays is valid when retrieved during refresh
-   */
+  // Test that getParentArrays is valid when retrieved during refresh
   @Test
-  @Ignore
   public void testTaxonomyReaderRefreshRaces() throws Exception {
     // compute base child arrays - after first chunk, and after the other
-    Directory indexDirBase =  newDirectory();
+    Directory indexDirBase = newDirectory();
     TaxonomyWriter twBase = new DirectoryTaxonomyWriter(indexDirBase);
     twBase.addCategory(new CategoryPath("a", "0"));
     final CategoryPath abPath = new CategoryPath("a", "b");
@@ -757,56 +758,64 @@ public class TestTaxonomyCombined extend
     final int abOrd = trBase.getOrdinal(abPath);
     final int abYoungChildBase1 = ca1.getYoungestChildArray()[abOrd]; 
     
-    for (int i=0; i < 1<<10; i++) { //1024 facets
+    final int numCategories = atLeast(800);
+    for (int i = 0; i < numCategories; i++) {
       twBase.addCategory(new CategoryPath("a", "b", Integer.toString(i)));
     }
-    twBase.commit();
+    twBase.close();
     
-    trBase.refresh();
+    TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(trBase);
+    assertNotNull(newTaxoReader);
+    trBase.close();
+    trBase = newTaxoReader;
     
     final ChildrenArrays ca2 = trBase.getChildrenArrays();
     final int abYoungChildBase2 = ca2.getYoungestChildArray()[abOrd];
     
-    for (int retry=0; retry<100; retry++) {
-      assertConsistentYoungestChild(abPath, abOrd, abYoungChildBase1,  abYoungChildBase2, retry);
+    int numRetries = atLeast(50);
+    for (int retry = 0; retry < numRetries; retry++) {
+      assertConsistentYoungestChild(abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, numCategories);
     }
+    
+    trBase.close();
     indexDirBase.close();
   }
 
   private void assertConsistentYoungestChild(final CategoryPath abPath,
-      final int abOrd, final int abYoungChildBase1, final int abYoungChildBase2, final int retry)
+      final int abOrd, final int abYoungChildBase1, final int abYoungChildBase2, final int retry, int numCategories)
       throws Exception {
-    SlowRAMDirectory indexDir =  new SlowRAMDirectory(-1,null); // no slowness for intialization
+    SlowRAMDirectory indexDir = new SlowRAMDirectory(-1, null); // no slowness for intialization
     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     tw.addCategory(new CategoryPath("a", "0"));
     tw.addCategory(abPath);
     tw.commit();
     
-    final TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
-    for (int i=0; i < 1<<10; i++) { //1024 facets
+    final DirectoryTaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
+    for (int i = 0; i < numCategories; i++) {
       final CategoryPath cp = new CategoryPath("a", "b", Integer.toString(i));
       tw.addCategory(cp);
       assertEquals("Ordinal of "+cp+" must be invalid until Taxonomy Reader was refreshed", TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(cp));
     }
-    tw.commit();
+    tw.close();
     
-    final boolean[] stop = new boolean[] { false };
+    final AtomicBoolean stop = new AtomicBoolean(false);
     final Throwable[] error = new Throwable[] { null };
     final int retrieval[] = { 0 }; 
     
     Thread thread = new Thread("Child Arrays Verifier") {
       @Override
       public void run() {
-        setPriority(1+getPriority());
+        setPriority(1 + getPriority());
         try {
-          while (!stop[0]) {
-            int lastOrd = tr.getParentArray().length-1;
-            assertNotNull("path of last-ord "+lastOrd+" is not found!",tr.getPath(lastOrd));
-            assertChildrenArrays(tr.getChildrenArrays(),retry,retrieval[0]++);
+          while (!stop.get()) {
+            int lastOrd = tr.getParentArray().length - 1;
+            assertNotNull("path of last-ord " + lastOrd + " is not found!", tr.getPath(lastOrd));
+            assertChildrenArrays(tr.getChildrenArrays(), retry, retrieval[0]++);
+            sleep(10); // don't starve refresh()'s CPU, which sleeps every 50 bytes for 1 ms
           }
         } catch (Throwable e) {
           error[0] = e;
-          stop[0] = true;
+          stop.set(true);
         }
       }
 
@@ -822,13 +831,15 @@ public class TestTaxonomyCombined extend
     thread.start();
     
     indexDir.setSleepMillis(1); // some delay for refresh
-    tr.refresh();
+    TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr);
+    if (newTaxoReader != null) {
+      newTaxoReader.close();
+    }
     
-    stop[0] = true;
+    stop.set(true);
     thread.join();
     assertNull("Unexpcted exception at retry "+retry+" retrieval "+retrieval[0]+": \n"+stackTraceStr(error[0]), error[0]);
     
-    tw.close();
     tr.close();
   }
 
@@ -885,7 +896,7 @@ public class TestTaxonomyCombined extend
       // ok
     }
     assertEquals(1, tr.getSize()); // still root only...
-    tr.refresh(); // this is not enough, because tw.commit() hasn't been done yet
+    assertNull(TaxonomyReader.openIfChanged(tr)); // this is not enough, because tw.commit() hasn't been done yet
     try {
       tr.getParent(author);
       fail("Before commit() and refresh(), getParent for "+author+" should still throw exception");
@@ -901,7 +912,11 @@ public class TestTaxonomyCombined extend
       // ok
     }
     assertEquals(1, tr.getSize()); // still root only...
-    tr.refresh();
+    TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr);
+    assertNotNull(newTaxoReader);
+    tr.close();
+    tr = newTaxoReader;
+    
     try {
       assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParent(author));
       // ok
@@ -917,7 +932,10 @@ public class TestTaxonomyCombined extend
     tw.addCategory(new CategoryPath("Author", "Richard Dawkins"));
     int dawkins = 2;
     tw.commit();
-    tr.refresh();
+    newTaxoReader = TaxonomyReader.openIfChanged(tr);
+    assertNotNull(newTaxoReader);
+    tr.close();
+    tr = newTaxoReader;
     assertEquals(author, tr.getParent(dawkins));
     assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParent(author));
     assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(TaxonomyReader.ROOT_ORDINAL));
@@ -943,16 +961,19 @@ public class TestTaxonomyCombined extend
     // before commit and refresh, no change:
     assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
     assertEquals(1, tr.getSize()); // still root only...
-    tr.refresh(); // this is not enough, because tw.commit() hasn't been done yet
+    assertNull(TaxonomyReader.openIfChanged(tr)); // this is not enough, because tw.commit() hasn't been done yet
     assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
     assertEquals(1, tr.getSize()); // still root only...
     tw.commit();
     // still not enough before refresh:
     assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
     assertEquals(1, tr.getSize()); // still root only...
-    tr.refresh(); // finally
+    TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr);
+    assertNotNull(newTaxoReader);
+    tr.close();
+    tr = newTaxoReader;
     assertEquals(1, tr.getOrdinal(author));
-    assertEquals(2, tr.getSize()); // still root only...
+    assertEquals(2, tr.getSize());
     tw.close();
     tr.close();
     indexDir.close();
@@ -977,7 +998,7 @@ public class TestTaxonomyCombined extend
     // Try to open a second writer, with the first one locking the directory.
     // We expect to get a LockObtainFailedException.
     try {
-      new DirectoryTaxonomyWriter(indexDir);
+      assertNull(new DirectoryTaxonomyWriter(indexDir));
       fail("should have failed to write in locked directory");
     } catch (LockObtainFailedException e) {
       // this is what we expect to happen.
@@ -989,7 +1010,10 @@ public class TestTaxonomyCombined extend
     tw2.addCategory(new CategoryPath("hey"));
     tw2.close();
     // See that the writer indeed wrote:
-    tr.refresh();
+    TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
+    assertNotNull(newtr);
+    tr.close();
+    tr = newtr;
     assertEquals(3, tr.getOrdinal(new CategoryPath("hey")));
     tr.close();
     tw.close();
@@ -1086,6 +1110,27 @@ public class TestTaxonomyCombined extend
     indexDir.close();
   }
 
+  @Test
+  public void testNRT() throws Exception {
+    Directory dir = newDirectory();
+    DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
+    TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
+    
+    CategoryPath cp = new CategoryPath("a");
+    writer.addCategory(cp);
+    TaxonomyReader newReader = TaxonomyReader.openIfChanged(reader);
+    assertNotNull("expected a new instance", newReader);
+    assertEquals(2, newReader.getSize());
+    assertNotSame(TaxonomyReader.INVALID_ORDINAL, newReader.getOrdinal(cp));
+    reader.close();
+    reader = newReader;
+    
+    writer.close();
+    reader.close();
+    
+    dir.close();
+  }
+
 //  TODO (Facet): test multiple readers, one writer. Have the multiple readers
 //  using the same object (simulating threads) or different objects
 //  (simulating processes).

Modified: lucene/dev/branches/LUCENE-2878/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java Fri Nov 23 12:00:32 2012
@@ -1,14 +1,17 @@
 package org.apache.lucene.facet.taxonomy.directory;
 
+import java.io.IOException;
 import java.util.Random;
 
+import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.facet.taxonomy.CategoryPath;
-import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
-import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
-import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LogByteSizeMergePolicy;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.LogMergePolicy;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
@@ -67,11 +70,8 @@ public class TestDirectoryTaxonomyReader
     dir.close();
   }
   
-  /**
-   * Test the boolean returned by TR.refresh
-   */
   @Test
-  public void testReaderRefreshResult() throws Exception {
+  public void testOpenIfChangedResult() throws Exception {
     Directory dir = null;
     DirectoryTaxonomyWriter ltw = null;
     DirectoryTaxonomyReader ltr = null;
@@ -84,13 +84,15 @@ public class TestDirectoryTaxonomyReader
       ltw.commit();
       
       ltr = new DirectoryTaxonomyReader(dir);
-      assertFalse("Nothing has changed",ltr.refresh());
+      assertNull("Nothing has changed", TaxonomyReader.openIfChanged(ltr));
       
       ltw.addCategory(new CategoryPath("b"));
       ltw.commit();
       
-      assertTrue("changes were committed",ltr.refresh());
-      assertFalse("Nothing has changed",ltr.refresh());
+      DirectoryTaxonomyReader newtr = TaxonomyReader.openIfChanged(ltr);
+      assertNotNull("changes were committed", newtr);
+      assertNull("Nothing has changed", TaxonomyReader.openIfChanged(newtr));
+      newtr.close();
     } finally {
       IOUtils.close(ltw, ltr, dir);
     }
@@ -119,18 +121,15 @@ public class TestDirectoryTaxonomyReader
    */
   @Test
   public void testFreshReadRecreatedTaxonomy() throws Exception {
-    doTestReadRecreatedTaxono(random(), true);
+    doTestReadRecreatedTaxonomy(random(), true);
   }
   
-  /**
-   * recreating a taxonomy should work well with a refreshed taxonomy reader 
-   */
   @Test
-  public void testRefreshReadRecreatedTaxonomy() throws Exception {
-    doTestReadRecreatedTaxono(random(), false);
+  public void testOpenIfChangedReadRecreatedTaxonomy() throws Exception {
+    doTestReadRecreatedTaxonomy(random(), false);
   }
   
-  private void doTestReadRecreatedTaxono(Random random, boolean closeReader) throws Exception {
+  private void doTestReadRecreatedTaxonomy(Random random, boolean closeReader) throws Exception {
     Directory dir = null;
     TaxonomyWriter tw = null;
     TaxonomyReader tr = null;
@@ -163,13 +162,10 @@ public class TestDirectoryTaxonomyReader
           tr.close();
           tr = new DirectoryTaxonomyReader(dir);
         } else {
-          try {
-            tr.refresh();
-            fail("Expected InconsistentTaxonomyException");
-          } catch (InconsistentTaxonomyException e) {
-            tr.close();
-            tr = new DirectoryTaxonomyReader(dir);
-          }
+          TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
+          assertNotNull(newtr);
+          tr.close();
+          tr = newtr;
         }
         assertEquals("Wrong #categories in taxonomy (i="+i+", k="+k+")", baseNumCategories + 1 + k, tr.getSize());
       }
@@ -179,14 +175,14 @@ public class TestDirectoryTaxonomyReader
   }
   
   @Test
-  public void testRefreshAndRefCount() throws Exception {
+  public void testOpenIfChangedAndRefCount() throws Exception {
     Directory dir = new RAMDirectory(); // no need for random directories here
 
     DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir);
     taxoWriter.addCategory(new CategoryPath("a"));
     taxoWriter.commit();
 
-    DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
+    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
     assertEquals("wrong refCount", 1, taxoReader.getRefCount());
 
     taxoReader.incRef();
@@ -194,12 +190,276 @@ public class TestDirectoryTaxonomyReader
 
     taxoWriter.addCategory(new CategoryPath("a", "b"));
     taxoWriter.commit();
-    taxoReader.refresh();
-    assertEquals("wrong refCount", 2, taxoReader.getRefCount());
+    TaxonomyReader newtr = TaxonomyReader.openIfChanged(taxoReader);
+    assertNotNull(newtr);
+    taxoReader.close();
+    taxoReader = newtr;
+    assertEquals("wrong refCount", 1, taxoReader.getRefCount());
 
     taxoWriter.close();
     taxoReader.close();
     dir.close();
   }
 
+  @Test
+  public void testOpenIfChangedManySegments() throws Exception {
+    // test openIfChanged() when the taxonomy contains many segments
+    Directory dir = newDirectory();
+    
+    DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
+      @Override
+      protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
+        IndexWriterConfig conf = super.createIndexWriterConfig(openMode);
+        LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
+        lmp.setMergeFactor(2);
+        return conf;
+      }
+    };
+    TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
+    
+    int numRounds = random().nextInt(10) + 10;
+    int numCategories = 1; // one for root
+    for (int i = 0; i < numRounds; i++) {
+      int numCats = random().nextInt(4) + 1;
+      for (int j = 0; j < numCats; j++) {
+        writer.addCategory(new CategoryPath(Integer.toString(i), Integer.toString(j)));
+      }
+      numCategories += numCats + 1 /* one for round-parent */;
+      TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
+      assertNotNull(newtr);
+      reader.close();
+      reader = newtr;
+      
+      // assert categories
+      assertEquals(numCategories, reader.getSize());
+      int roundOrdinal = reader.getOrdinal(new CategoryPath(Integer.toString(i)));
+      int[] parents = reader.getParentArray();
+      assertEquals(0, parents[roundOrdinal]); // round's parent is root
+      for (int j = 0; j < numCats; j++) {
+        int ord = reader.getOrdinal(new CategoryPath(Integer.toString(i), Integer.toString(j)));
+        assertEquals(roundOrdinal, parents[ord]); // round's parent is root
+      }
+    }
+    
+    reader.close();
+    writer.close();
+    dir.close();
+  }
+  
+  @Test
+  public void testOpenIfChangedMergedSegment() throws Exception {
+    // test openIfChanged() when all index segments were merged - used to be
+    // a bug in ParentArray, caught by testOpenIfChangedManySegments - only
+    // this test is not random
+    Directory dir = newDirectory();
+    
+    // hold onto IW to forceMerge
+    // note how we don't close it, since DTW will close it.
+    final IndexWriter iw = new IndexWriter(dir,
+        new IndexWriterConfig(TEST_VERSION_CURRENT, new KeywordAnalyzer())
+            .setMergePolicy(new LogByteSizeMergePolicy()));
+    DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
+      @Override
+      protected IndexWriter openIndexWriter(Directory directory,
+          IndexWriterConfig config) throws IOException {
+        return iw;
+      }
+    };
+    
+    TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
+    assertEquals(1, reader.getSize());
+    assertEquals(1, reader.getParentArray().length);
+
+    // add category and call forceMerge -- this should flush IW and merge segments down to 1
+    // in ParentArray.initFromReader, this used to fail assuming there are no parents.
+    writer.addCategory(new CategoryPath("1"));
+    iw.forceMerge(1);
+    
+    // now calling openIfChanged should trip on the bug
+    TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
+    assertNotNull(newtr);
+    reader.close();
+    reader = newtr;
+    assertEquals(2, reader.getSize());
+    assertEquals(2, reader.getParentArray().length);
+    
+    reader.close();
+    writer.close();
+    dir.close();
+  }
+  
+  @Test
+  public void testOpenIfChangedNoChangesButSegmentMerges() throws Exception {
+    // test openIfChanged() when the taxonomy hasn't really changed, but segments
+    // were merged. The NRT reader will be reopened, and ParentArray used to assert
+    // that the new reader contains more ordinals than were given from the old
+    // TaxReader version
+    Directory dir = newDirectory();
+    
+    // hold onto IW to forceMerge
+    // note how we don't close it, since DTW will close it.
+    final IndexWriter iw = new IndexWriter(dir,
+        new IndexWriterConfig(TEST_VERSION_CURRENT, new KeywordAnalyzer())
+            .setMergePolicy(new LogByteSizeMergePolicy()));
+    DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
+      @Override
+      protected IndexWriter openIndexWriter(Directory directory,
+          IndexWriterConfig config) throws IOException {
+        return iw;
+      }
+    };
+    
+    // add a category so that the following DTR open will cause a flush and 
+    // a new segment will be created
+    writer.addCategory(new CategoryPath("a"));
+    
+    TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
+    assertEquals(2, reader.getSize());
+    assertEquals(2, reader.getParentArray().length);
+
+    // merge all the segments so that NRT reader thinks there's a change 
+    iw.forceMerge(1);
+    
+    // now calling openIfChanged should trip on the wrong assert in ParetArray's ctor
+    TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
+    assertNotNull(newtr);
+    reader.close();
+    reader = newtr;
+    assertEquals(2, reader.getSize());
+    assertEquals(2, reader.getParentArray().length);
+    
+    reader.close();
+    writer.close();
+    dir.close();
+  }
+ 
+  @Test
+  public void testOpenIfChangedReuseAfterRecreate() throws Exception {
+    // tests that if the taxonomy is recreated, no data is reused from the previous taxonomy
+    Directory dir = newDirectory();
+    DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
+    CategoryPath cp_a = new CategoryPath("a");
+    writer.addCategory(cp_a);
+    writer.close();
+    
+    DirectoryTaxonomyReader r1 = new DirectoryTaxonomyReader(dir);
+    // fill r1's caches
+    assertEquals(1, r1.getOrdinal(cp_a));
+    assertEquals(cp_a, r1.getPath(1));
+    
+    // now recreate, add a different category
+    writer = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
+    CategoryPath cp_b = new CategoryPath("b");
+    writer.addCategory(cp_b);
+    writer.close();
+    
+    DirectoryTaxonomyReader r2 = TaxonomyReader.openIfChanged(r1);
+    assertNotNull(r2);
+    
+    // fill r2's caches
+    assertEquals(1, r2.getOrdinal(cp_b));
+    assertEquals(cp_b, r2.getPath(1));
+    
+    // check that r1 doesn't see cp_b
+    assertEquals(TaxonomyReader.INVALID_ORDINAL, r1.getOrdinal(cp_b));
+    assertEquals(cp_a, r1.getPath(1));
+
+    // check that r2 doesn't see cp_a
+    assertEquals(TaxonomyReader.INVALID_ORDINAL, r2.getOrdinal(cp_a));
+    assertEquals(cp_b, r2.getPath(1));
+
+    r2.close();
+    r1.close();
+    dir.close();
+  }
+  
+  @Test
+  public void testOpenIfChangedReuse() throws Exception {
+    // test the reuse of data from the old DTR instance
+    for (boolean nrt : new boolean[] {false, true}) {
+      Directory dir = newDirectory();
+      DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
+      
+      CategoryPath cp_a = new CategoryPath("a");
+      writer.addCategory(cp_a);
+      if (!nrt) writer.commit();
+      
+      DirectoryTaxonomyReader r1 = nrt ? new DirectoryTaxonomyReader(writer) : new DirectoryTaxonomyReader(dir);
+      // fill r1's caches
+      assertEquals(1, r1.getOrdinal(cp_a));
+      assertEquals(cp_a, r1.getPath(1));
+      
+      CategoryPath cp_b = new CategoryPath("b");
+      writer.addCategory(cp_b);
+      if (!nrt) writer.commit();
+      
+      DirectoryTaxonomyReader r2 = TaxonomyReader.openIfChanged(r1);
+      assertNotNull(r2);
+      
+      // add r2's categories to the caches
+      assertEquals(2, r2.getOrdinal(cp_b));
+      assertEquals(cp_b, r2.getPath(2));
+      
+      // check that r1 doesn't see cp_b
+      assertEquals(TaxonomyReader.INVALID_ORDINAL, r1.getOrdinal(cp_b));
+      assertNull(r1.getPath(2));
+      
+      r1.close();
+      r2.close();
+      writer.close();
+      dir.close();
+    }
+  }
+  
+  @Test
+  public void testOpenIfChangedReplaceTaxonomy() throws Exception {
+    // test openIfChanged when replaceTaxonomy is called, which is equivalent to recreate
+    // only can work with NRT as well
+    Directory src = newDirectory();
+    DirectoryTaxonomyWriter w = new DirectoryTaxonomyWriter(src);
+    CategoryPath cp_b = new CategoryPath("b");
+    w.addCategory(cp_b);
+    w.close();
+    
+    for (boolean nrt : new boolean[] {false, true}) {
+      Directory dir = newDirectory();
+      DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
+      
+      CategoryPath cp_a = new CategoryPath("a");
+      writer.addCategory(cp_a);
+      if (!nrt) writer.commit();
+      
+      DirectoryTaxonomyReader r1 = nrt ? new DirectoryTaxonomyReader(writer) : new DirectoryTaxonomyReader(dir);
+      // fill r1's caches
+      assertEquals(1, r1.getOrdinal(cp_a));
+      assertEquals(cp_a, r1.getPath(1));
+
+      // now replace taxonomy
+      writer.replaceTaxonomy(src);
+      if (!nrt) writer.commit();
+      
+      DirectoryTaxonomyReader r2 = TaxonomyReader.openIfChanged(r1);
+      assertNotNull(r2);
+      
+      // fill r2's caches
+      assertEquals(1, r2.getOrdinal(cp_b));
+      assertEquals(cp_b, r2.getPath(1));
+      
+      // check that r1 doesn't see cp_b
+      assertEquals(TaxonomyReader.INVALID_ORDINAL, r1.getOrdinal(cp_b));
+      assertEquals(cp_a, r1.getPath(1));
+
+      // check that r2 doesn't see cp_a
+      assertEquals(TaxonomyReader.INVALID_ORDINAL, r2.getOrdinal(cp_a));
+      assertEquals(cp_b, r2.getPath(1));
+
+      r2.close();
+      r1.close();
+      writer.close();
+      dir.close();
+    }
+    
+    src.close();
+  }
+  
 }

Modified: lucene/dev/branches/LUCENE-2878/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java Fri Nov 23 12:00:32 2012
@@ -8,7 +8,7 @@ import java.util.concurrent.ConcurrentHa
 import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.lucene.facet.taxonomy.CategoryPath;
-import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
 import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache;
 import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
@@ -96,10 +96,10 @@ public class TestDirectoryTaxonomyWriter
     Map <String, String> readUserCommitData = r.getIndexCommit().getUserData();
     assertTrue("wrong value extracted from commit data", 
         "1 2 3".equals(readUserCommitData.get("testing")));
-    assertNotNull("index.create.time not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME));
+    assertNotNull(DirectoryTaxonomyWriter.INDEX_EPOCH + " not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_EPOCH));
     r.close();
     
-    // open DirTaxoWriter again and commit, INDEX_CREATE_TIME should still exist
+    // open DirTaxoWriter again and commit, INDEX_EPOCH should still exist
     // in the commit data, otherwise DirTaxoReader.refresh() might not detect
     // that the taxonomy index has been recreated.
     taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
@@ -111,7 +111,7 @@ public class TestDirectoryTaxonomyWriter
     
     r = DirectoryReader.open(dir);
     readUserCommitData = r.getIndexCommit().getUserData();
-    assertNotNull("index.create.time not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME));
+    assertNotNull(DirectoryTaxonomyWriter.INDEX_EPOCH + " not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_EPOCH));
     r.close();
     
     dir.close();
@@ -119,7 +119,7 @@ public class TestDirectoryTaxonomyWriter
   
   @Test
   public void testRollback() throws Exception {
-    // Verifies that if callback is called, DTW is closed.
+    // Verifies that if rollback is called, DTW is closed.
     Directory dir = newDirectory();
     DirectoryTaxonomyWriter dtw = new DirectoryTaxonomyWriter(dir);
     dtw.addCategory(new CategoryPath("a"));
@@ -130,6 +130,19 @@ public class TestDirectoryTaxonomyWriter
     } catch (AlreadyClosedException e) {
       // expected
     }
+    
+    dir.close();
+  }
+  
+  @Test
+  public void testRecreateRollback() throws Exception {
+    // Tests rollback with OpenMode.CREATE
+    Directory dir = newDirectory();
+    new DirectoryTaxonomyWriter(dir).close();
+    assertEquals(1, getEpoch(dir));
+    new DirectoryTaxonomyWriter(dir, OpenMode.CREATE).rollback();
+    assertEquals(1, getEpoch(dir));
+    
     dir.close();
   }
   
@@ -157,7 +170,7 @@ public class TestDirectoryTaxonomyWriter
   
   @Test
   public void testRecreateAndRefresh() throws Exception {
-    // DirTaxoWriter lost the INDEX_CREATE_TIME property if it was opened in
+    // DirTaxoWriter lost the INDEX_EPOCH property if it was opened in
     // CREATE_OR_APPEND (or commit(userData) called twice), which could lead to
     // DirTaxoReader succeeding to refresh().
     Directory dir = newDirectory();
@@ -165,14 +178,16 @@ public class TestDirectoryTaxonomyWriter
     DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
     touchTaxo(taxoWriter, new CategoryPath("a"));
     
-    DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
+    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
 
     touchTaxo(taxoWriter, new CategoryPath("b"));
     
-    // this should not fail
-    taxoReader.refresh();
+    TaxonomyReader newtr = TaxonomyReader.openIfChanged(taxoReader);
+    taxoReader.close();
+    taxoReader = newtr;
+    assertEquals(1, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));
 
-    // now recreate the taxonomy, and check that the timestamp is preserved after opening DirTW again.
+    // now recreate the taxonomy, and check that the epoch is preserved after opening DirTW again.
     taxoWriter.close();
     taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, NO_OP_CACHE);
     touchTaxo(taxoWriter, new CategoryPath("c"));
@@ -182,33 +197,30 @@ public class TestDirectoryTaxonomyWriter
     touchTaxo(taxoWriter, new CategoryPath("d"));
     taxoWriter.close();
 
-    // this should fail
-    try {
-      taxoReader.refresh();
-      fail("IconsistentTaxonomyException should have been thrown");
-    } catch (InconsistentTaxonomyException e) {
-      // ok, expected
-    }
-    
+    newtr = TaxonomyReader.openIfChanged(taxoReader);
+    taxoReader.close();
+    taxoReader = newtr;
+    assertEquals(2, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));
+
     taxoReader.close();
     dir.close();
   }
 
   @Test
-  public void testUndefinedCreateTime() throws Exception {
-    // tests that if the taxonomy index doesn't have the INDEX_CREATE_TIME
+  public void testBackwardsCompatibility() throws Exception {
+    // tests that if the taxonomy index doesn't have the INDEX_EPOCH
     // property (supports pre-3.6 indexes), all still works.
     Directory dir = newDirectory();
     
-    // create an empty index first, so that DirTaxoWriter initializes createTime to null.
+    // create an empty index first, so that DirTaxoWriter initializes indexEpoch to 1.
     new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)).close();
     
     DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
-    // we cannot commit null keys/values, this ensures that if DirTW.createTime is null, we can still commit.
     taxoWriter.close();
     
     DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
-    taxoReader.refresh();
+    assertEquals(1, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));
+    assertNull(TaxonomyReader.openIfChanged(taxoReader));
     taxoReader.close();
     
     dir.close();
@@ -267,10 +279,10 @@ public class TestDirectoryTaxonomyWriter
     dir.close();
   }
 
-  private String getCreateTime(Directory taxoDir) throws IOException {
+  private long getEpoch(Directory taxoDir) throws IOException {
     SegmentInfos infos = new SegmentInfos();
     infos.read(taxoDir);
-    return infos.getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
+    return Long.parseLong(infos.getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH));
   }
   
   @Test
@@ -286,7 +298,7 @@ public class TestDirectoryTaxonomyWriter
     taxoWriter.addCategory(new CategoryPath("c"));
     taxoWriter.commit();
     
-    String origCreateTime = getCreateTime(dir);
+    long origEpoch = getEpoch(dir);
     
     // replace the taxonomy with the input one
     taxoWriter.replaceTaxonomy(input);
@@ -298,8 +310,8 @@ public class TestDirectoryTaxonomyWriter
     
     taxoWriter.close();
     
-    String newCreateTime = getCreateTime(dir);
-    assertNotSame("create time should have been changed after replaceTaxonomy", origCreateTime, newCreateTime);
+    long newEpoch = getEpoch(dir);
+    assertTrue("index epoch should have been updated after replaceTaxonomy", origEpoch < newEpoch);
     
     dir.close();
     input.close();

Modified: lucene/dev/branches/LUCENE-2878/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java Fri Nov 23 12:00:32 2012
@@ -111,7 +111,7 @@ public class BlockGroupingCollector exte
     }
     
     @Override
-    public float freq() {
+    public int freq() {
       throw new UnsupportedOperationException(); // TODO: wtf does this class do?
     }
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java Fri Nov 23 12:00:32 2012
@@ -224,7 +224,7 @@ class TermsIncludingScoreQuery extends Q
     }
 
     @Override
-    public float freq() {
+    public int freq() {
       return 1;
     }
   }
@@ -323,7 +323,7 @@ class TermsIncludingScoreQuery extends Q
       return scores[currentDoc];
     }
 
-    public float freq() throws IOException {
+    public int freq() throws IOException {
       return 1;
     }
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java Fri Nov 23 12:00:32 2012
@@ -170,7 +170,7 @@ public class ToChildBlockJoinQuery exten
     private final Bits acceptDocs;
 
     private float parentScore;
-    private float parentFreq = 1;
+    private int parentFreq = 1;
 
     private int childDoc = -1;
     private int parentDoc;
@@ -259,7 +259,7 @@ public class ToChildBlockJoinQuery exten
     }
 
     @Override
-    public float freq() throws IOException {
+    public int freq() throws IOException {
       return parentFreq;
     }
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java Fri Nov 23 12:00:32 2012
@@ -330,7 +330,7 @@ public class ToParentBlockJoinCollector 
     }
     
     @Override
-    public float freq() {
+    public int freq() {
       return 1; // TODO: does anything else make sense?... duplicate of grouping's FakeScorer btw?
     }
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java Fri Nov 23 12:00:32 2012
@@ -220,7 +220,7 @@ public class ToParentBlockJoinQuery exte
     private int parentDoc = -1;
     private int prevParentDoc;
     private float parentScore;
-    private float parentFreq;
+    private int parentFreq;
     private int nextChildDoc;
 
     private int[] pendingChildDocs = new int[5];
@@ -302,11 +302,10 @@ public class ToParentBlockJoinQuery exte
         }
 
         float totalScore = 0;
-        float totalFreq = 0;
         float maxScore = Float.NEGATIVE_INFINITY;
-        float maxFreq = 0;
 
         childDocUpto = 0;
+        parentFreq = 0;
         do {
 
           //System.out.println("  c=" + nextChildDoc);
@@ -320,12 +319,11 @@ public class ToParentBlockJoinQuery exte
           if (scoreMode != ScoreMode.None) {
             // TODO: specialize this into dedicated classes per-scoreMode
             final float childScore = childScorer.score();
-            final float childFreq = childScorer.freq();
+            final int childFreq = childScorer.freq();
             pendingChildScores[childDocUpto] = childScore;
             maxScore = Math.max(childScore, maxScore);
-            maxFreq = Math.max(childFreq, maxFreq);
             totalScore += childScore;
-            totalFreq += childFreq;
+            parentFreq += childFreq;
           }
           childDocUpto++;
           nextChildDoc = childScorer.nextDoc();
@@ -337,15 +335,12 @@ public class ToParentBlockJoinQuery exte
         switch(scoreMode) {
         case Avg:
           parentScore = totalScore / childDocUpto;
-          parentFreq = totalFreq / childDocUpto;
           break;
         case Max:
           parentScore = maxScore;
-          parentFreq = maxFreq;
           break;
         case Total:
           parentScore = totalScore;
-          parentFreq = totalFreq;
           break;
         case None:
           break;
@@ -367,7 +362,7 @@ public class ToParentBlockJoinQuery exte
     }
     
     @Override
-    public float freq() {
+    public int freq() {
       return parentFreq;
     }
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java Fri Nov 23 12:00:32 2012
@@ -273,6 +273,62 @@ public class TestBlockJoin extends Lucen
     dir.close();
   }
 
+  public void testNestedDocScoringWithDeletes() throws Exception {
+    final Directory dir = newDirectory();
+    final RandomIndexWriter w = new RandomIndexWriter(
+        random(),
+        dir,
+        newIndexWriterConfig(TEST_VERSION_CURRENT,
+            new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
+
+    // Cannot assert this since we use NoMergePolicy:
+    w.setDoRandomForceMergeAssert(false);
+
+    List<Document> docs = new ArrayList<Document>();
+    docs.add(makeJob("java", 2007));
+    docs.add(makeJob("python", 2010));
+    docs.add(makeResume("Lisa", "United Kingdom"));
+    w.addDocuments(docs);
+
+    docs.clear();
+    docs.add(makeJob("c", 1999));
+    docs.add(makeJob("ruby", 2005));
+    docs.add(makeJob("java", 2006));
+    docs.add(makeResume("Frank", "United States"));
+    w.addDocuments(docs);
+
+    w.commit();
+    IndexSearcher s = newSearcher(DirectoryReader.open(dir));
+
+    ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(
+        NumericRangeQuery.newIntRange("year", 1990, 2010, true, true),
+        new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))),
+        ScoreMode.Total
+    );
+
+    TopDocs topDocs = s.search(q, 10);
+    assertEquals(2, topDocs.totalHits);
+    assertEquals(6, topDocs.scoreDocs[0].doc);
+    assertEquals(3.0f, topDocs.scoreDocs[0].score, 0.0f);
+    assertEquals(2, topDocs.scoreDocs[1].doc);
+    assertEquals(2.0f, topDocs.scoreDocs[1].score, 0.0f);
+
+    s.getIndexReader().close();
+    w.deleteDocuments(new Term("skill", "java"));
+    w.close();
+    s = newSearcher(DirectoryReader.open(dir));
+
+    topDocs = s.search(q, 10);
+    assertEquals(2, topDocs.totalHits);
+    assertEquals(6, topDocs.scoreDocs[0].doc);
+    assertEquals(2.0f, topDocs.scoreDocs[0].score, 0.0f);
+    assertEquals(2, topDocs.scoreDocs[1].doc);
+    assertEquals(1.0f, topDocs.scoreDocs[1].score, 0.0f);
+
+    s.getIndexReader().close();
+    dir.close();
+  }
+
   private String[][] getRandomFields(int maxUniqueValues) {
 
     final String[][] fields = new String[_TestUtil.nextInt(random(), 2, 4)][];

Modified: lucene/dev/branches/LUCENE-2878/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Fri Nov 23 12:00:32 2012
@@ -58,9 +58,19 @@ import org.apache.lucene.search.similari
 import org.apache.lucene.store.RAMDirectory; // for javadocs
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.IntBlockPool;
+import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
+import org.apache.lucene.util.IntBlockPool.SliceReader;
+import org.apache.lucene.util.IntBlockPool.SliceWriter;
 import org.apache.lucene.util.Constants; // for javadocs
 import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.RecyclingByteBlockAllocator;
+import org.apache.lucene.util.RecyclingIntBlockAllocator;
+
 
 /**
  * High-performance single-document main memory Apache Lucene fulltext search index. 
@@ -191,15 +201,18 @@ public class MemoryIndex {
   /** fields sorted ascending by fieldName; lazily computed on demand */
   private transient Map.Entry<String,Info>[] sortedFields; 
   
-  /** pos: positions[3*i], startOffset: positions[3*i +1], endOffset: positions[3*i +2] */
-  private final int stride;
+  private final boolean storeOffsets;
   
-  /** Could be made configurable; */
-  private static final float docBoost = 1.0f;
-
   private static final boolean DEBUG = false;
 
+  private final ByteBlockPool byteBlockPool;
+  private final IntBlockPool intBlockPool;
+//  private final IntBlockPool.SliceReader postingsReader;
+  private final IntBlockPool.SliceWriter postingsWriter;
+  
   private HashMap<String,FieldInfo> fieldInfos = new HashMap<String,FieldInfo>();
+
+  private Counter bytesUsed;
   
   /**
    * Sorts term entries into ascending order; also works for
@@ -233,8 +246,26 @@ public class MemoryIndex {
    *            whether or not to store the start and end character offset of
    *            each token term in the text
    */
-  protected MemoryIndex(boolean storeOffsets) {
-    this.stride = storeOffsets ? 3 : 1;
+  public MemoryIndex(boolean storeOffsets) {
+    this(storeOffsets, 0);
+    
+  }
+  
+  /**
+   * Expert: This constructor accepts a byte and int block allocator that is used internally to allocate 
+   * int & byte blocks for term and posting storage.  
+   * @param storeOffsets <code>true</code> if offsets should be stored
+   * @param maxReusedBytes the number of bytes that should remain in the internal memory pools after {@link #reset()} is called
+   */
+  MemoryIndex(boolean storeOffsets, long maxReusedBytes) {
+    this.storeOffsets = storeOffsets;
+    this.bytesUsed = Counter.newCounter();
+    final int maxBufferedByteBlocks = (int)((maxReusedBytes/2) / ByteBlockPool.BYTE_BLOCK_SIZE );
+    final int maxBufferedIntBlocks = (int) ((maxReusedBytes - (maxBufferedByteBlocks*ByteBlockPool.BYTE_BLOCK_SIZE))/(IntBlockPool.INT_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT));
+    assert (maxBufferedByteBlocks * ByteBlockPool.BYTE_BLOCK_SIZE) + (maxBufferedIntBlocks * IntBlockPool.INT_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT) <= maxReusedBytes;
+    byteBlockPool = new ByteBlockPool(new RecyclingByteBlockAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, maxBufferedByteBlocks, bytesUsed));
+    intBlockPool = new IntBlockPool(new RecyclingIntBlockAllocator(IntBlockPool.INT_BLOCK_SIZE, maxBufferedIntBlocks, bytesUsed));
+    postingsWriter = new SliceWriter(intBlockPool);
   }
   
   /**
@@ -265,7 +296,7 @@ public class MemoryIndex {
       throw new RuntimeException(ex);
     }
 
-    addField(fieldName, stream);
+    addField(fieldName, stream, 1.0f, analyzer.getPositionIncrementGap(fieldName));
   }
   
   /**
@@ -319,7 +350,7 @@ public class MemoryIndex {
   public void addField(String fieldName, TokenStream stream) {
     addField(fieldName, stream, 1.0f);
   }
-
+  
   /**
    * Iterates over the given token stream and adds the resulting terms to the index;
    * Equivalent to adding a tokenized, indexed, termVectorStored, unstored,
@@ -333,9 +364,32 @@ public class MemoryIndex {
    *            the token stream to retrieve tokens from.
    * @param boost
    *            the boost factor for hits for this field
+   *  
    * @see org.apache.lucene.document.Field#setBoost(float)
    */
   public void addField(String fieldName, TokenStream stream, float boost) {
+    addField(fieldName, stream, boost, 0);
+  }
+
+  /**
+   * Iterates over the given token stream and adds the resulting terms to the index;
+   * Equivalent to adding a tokenized, indexed, termVectorStored, unstored,
+   * Lucene {@link org.apache.lucene.document.Field}.
+   * Finally closes the token stream. Note that untokenized keywords can be added with this method via 
+   * {@link #keywordTokenStream(Collection)}, the Lucene <code>KeywordTokenizer</code> or similar utilities.
+   * 
+   * @param fieldName
+   *            a name to be associated with the text
+   * @param stream
+   *            the token stream to retrieve tokens from.
+   * @param boost
+   *            the boost factor for hits for this field
+   * @param positionIncrementGap
+   *            the position increment gap if fields with the same name are added more than once
+   *
+   * @see org.apache.lucene.document.Field#setBoost(float)
+   */
+  public void addField(String fieldName, TokenStream stream, float boost, int positionIncrementGap) {
     try {
       if (fieldName == null)
         throw new IllegalArgumentException("fieldName must not be null");
@@ -343,24 +397,36 @@ public class MemoryIndex {
           throw new IllegalArgumentException("token stream must not be null");
       if (boost <= 0.0f)
           throw new IllegalArgumentException("boost factor must be greater than 0.0");
-      if (fields.get(fieldName) != null)
-        throw new IllegalArgumentException("field must not be added more than once");
-      
-      HashMap<BytesRef,ArrayIntList> terms = new HashMap<BytesRef,ArrayIntList>();
       int numTokens = 0;
       int numOverlapTokens = 0;
       int pos = -1;
+      final BytesRefHash terms;
+      final SliceByteStartArray sliceArray;
+      Info info = null;
+      long sumTotalTermFreq = 0;
+      if ((info = fields.get(fieldName)) != null) {
+        numTokens = info.numTokens;
+        numOverlapTokens = info.numOverlapTokens;
+        pos = info.lastPosition + positionIncrementGap;
+        terms = info.terms;
+        boost *= info.boost;
+        sliceArray = info.sliceArray;
+        sumTotalTermFreq = info.sumTotalTermFreq;
+      } else {
+        sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY);
+        terms = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray);
+      }
 
       if (!fieldInfos.containsKey(fieldName)) {
         fieldInfos.put(fieldName, 
-            new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null, null, null));
+            new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS , null, null, null));
       }
-      
       TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
       PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
       OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
       BytesRef ref = termAtt.getBytesRef();
       stream.reset();
+      
       while (stream.incrementToken()) {
         termAtt.fillBytesRef();
         if (ref.length == 0) continue; // nothing to do
@@ -370,27 +436,32 @@ public class MemoryIndex {
         if (posIncr == 0)
           numOverlapTokens++;
         pos += posIncr;
-        
-        ArrayIntList positions = terms.get(ref);
-        if (positions == null) { // term not seen before
-          positions = new ArrayIntList(stride);
-          terms.put(BytesRef.deepCopyOf(ref), positions);
+        int ord = terms.add(ref);
+        if (ord < 0) {
+          ord = (-ord) - 1;
+          postingsWriter.reset(sliceArray.end[ord]);
+        } else {
+          sliceArray.start[ord] = postingsWriter.startNewSlice();
         }
-        if (stride == 1) {
-          positions.add(pos);
+        sliceArray.freq[ord]++;
+        sumTotalTermFreq++;
+        if (!storeOffsets) {
+          postingsWriter.writeInt(pos);
         } else {
-          positions.add(pos, offsetAtt.startOffset(), offsetAtt.endOffset());
+          postingsWriter.writeInt(pos);
+          postingsWriter.writeInt(offsetAtt.startOffset());
+          postingsWriter.writeInt(offsetAtt.endOffset());
         }
+        sliceArray.end[ord] = postingsWriter.getCurrentOffset();
       }
       stream.end();
 
       // ensure infos.numTokens > 0 invariant; needed for correct operation of terms()
       if (numTokens > 0) {
-        boost = boost * docBoost; // see DocumentWriter.addDocument(...)
-        fields.put(fieldName, new Info(terms, numTokens, numOverlapTokens, boost));
+        fields.put(fieldName, new Info(terms, sliceArray, numTokens, numOverlapTokens, boost, pos, sumTotalTermFreq));
         sortedFields = null;    // invalidate sorted view, if any
       }
-    } catch (IOException e) { // can never happen
+    } catch (Exception e) { // can never happen
       throw new RuntimeException(e);
     } finally {
       try {
@@ -484,10 +555,6 @@ public class MemoryIndex {
     return RamUsageEstimator.sizeOf(this);
   }
 
-  private int numPositions(ArrayIntList positions) {
-    return positions.size() / stride;
-  }
-  
   /** sorts into ascending order (on demand), reusing memory along the way */
   private void sortFields() {
     if (sortedFields == null) sortedFields = sort(fields);
@@ -519,31 +586,50 @@ public class MemoryIndex {
     sortFields();   
     int sumPositions = 0;
     int sumTerms = 0;
-    
+    final BytesRef spare = new BytesRef();
     for (int i=0; i < sortedFields.length; i++) {
       Map.Entry<String,Info> entry = sortedFields[i];
       String fieldName = entry.getKey();
       Info info = entry.getValue();
       info.sortTerms();
       result.append(fieldName + ":\n");
-      
+      SliceByteStartArray sliceArray = info.sliceArray;
       int numPositions = 0;
-      for (int j=0; j < info.sortedTerms.length; j++) {
-        Map.Entry<BytesRef,ArrayIntList> e = info.sortedTerms[j];
-        BytesRef term = e.getKey();
-        ArrayIntList positions = e.getValue();
-        result.append("\t'" + term + "':" + numPositions(positions) + ":");
-        result.append(positions.toString(stride)); // ignore offsets
+      SliceReader postingsReader = new SliceReader(intBlockPool);
+      for (int j=0; j < info.terms.size(); j++) {
+        int ord = info.sortedTerms[j];
+        info.terms.get(ord, spare);
+        int freq = sliceArray.freq[ord];
+        result.append("\t'" + spare + "':" + freq + ":");
+        postingsReader.reset(sliceArray.start[ord], sliceArray.end[ord]);
+        result.append(" [");
+        final int iters = storeOffsets ? 3 : 1; 
+        while(!postingsReader.endOfSlice()) {
+          result.append("(");
+          
+          for (int k = 0; k < iters; k++) {
+            result.append(postingsReader.readInt());
+            if (k < iters-1) {
+              result.append(", ");
+            }
+          }
+          result.append(")");
+          if (!postingsReader.endOfSlice()) {
+            result.append(",");
+          }
+          
+        }
+        result.append("]");
         result.append("\n");
-        numPositions += numPositions(positions);
+        numPositions += freq;
       }
       
-      result.append("\tterms=" + info.sortedTerms.length);
+      result.append("\tterms=" + info.terms.size());
       result.append(", positions=" + numPositions);
       result.append(", memory=" + RamUsageEstimator.humanReadableUnits(RamUsageEstimator.sizeOf(info)));
       result.append("\n");
       sumPositions += numPositions;
-      sumTerms += info.sortedTerms.length;
+      sumTerms += info.terms.size();
     }
     
     result.append("\nfields=" + sortedFields.length);
@@ -563,10 +649,12 @@ public class MemoryIndex {
      * Term strings and their positions for this field: Map <String
      * termText, ArrayIntList positions>
      */
-    private final HashMap<BytesRef,ArrayIntList> terms; 
+    private final BytesRefHash terms; 
+    
+    private final SliceByteStartArray sliceArray;
     
     /** Terms sorted ascending by term text; computed on demand */
-    private transient Map.Entry<BytesRef,ArrayIntList>[] sortedTerms;
+    private transient int[] sortedTerms;
     
     /** Number of added tokens for this field */
     private final int numTokens;
@@ -579,16 +667,17 @@ public class MemoryIndex {
 
     private final long sumTotalTermFreq;
 
-    public Info(HashMap<BytesRef,ArrayIntList> terms, int numTokens, int numOverlapTokens, float boost) {
+    /** the last position encountered in this field for multi field support*/
+    private int lastPosition;
+
+    public Info(BytesRefHash terms, SliceByteStartArray sliceArray, int numTokens, int numOverlapTokens, float boost, int lastPosition, long sumTotalTermFreq) {
       this.terms = terms;
+      this.sliceArray = sliceArray; 
       this.numTokens = numTokens;
       this.numOverlapTokens = numOverlapTokens;
       this.boost = boost;
-      long sum = 0;
-      for(Map.Entry<BytesRef,ArrayIntList> ent : terms.entrySet()) {
-        sum += ent.getValue().size();
-      }
-      sumTotalTermFreq = sum;
+      this.sumTotalTermFreq = sumTotalTermFreq;
+      this.lastPosition = lastPosition;
     }
 
     public long getSumTotalTermFreq() {
@@ -604,83 +693,15 @@ public class MemoryIndex {
      * apart from more sophisticated Tries / prefix trees).
      */
     public void sortTerms() {
-      if (sortedTerms == null) sortedTerms = sort(terms);
+      if (sortedTerms == null) 
+        sortedTerms = terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
     }
         
     public float getBoost() {
       return boost;
     }
-    
-  }
-  
-  
-  ///////////////////////////////////////////////////////////////////////////////
-  // Nested classes:
-  ///////////////////////////////////////////////////////////////////////////////
-  /**
-   * Efficient resizable auto-expanding list holding <code>int</code> elements;
-   * implemented with arrays.
-   */
-  private static final class ArrayIntList {
-
-    private int[] elements;
-    private int size = 0;
-      
-    public ArrayIntList(int initialCapacity) {
-      elements = new int[initialCapacity];
-    }
-
-    public void add(int elem) {
-      if (size == elements.length) ensureCapacity(size + 1);
-      elements[size++] = elem;
-    }
-
-    public void add(int pos, int start, int end) {
-      if (size + 3 > elements.length) ensureCapacity(size + 3);
-      elements[size] = pos;
-      elements[size+1] = start;
-      elements[size+2] = end;
-      size += 3;
-    }
-
-    public int get(int index) {
-      if (index >= size) throwIndex(index);
-      return elements[index];
-    }
-    
-    public int size() {
-      return size;
-    }
-    
-    private void ensureCapacity(int minCapacity) {
-      int newCapacity = Math.max(minCapacity, (elements.length * 3) / 2 + 1);
-      int[] newElements = new int[newCapacity];
-      System.arraycopy(elements, 0, newElements, 0, size);
-      elements = newElements;
-    }
-
-    private void throwIndex(int index) {
-      throw new IndexOutOfBoundsException("index: " + index
-            + ", size: " + size);
-    }
-    
-    /** returns the first few positions (without offsets); debug only */
-    public String toString(int stride) {
-      int s = size() / stride;
-      int len = Math.min(10, s); // avoid printing huge lists
-      StringBuilder buf = new StringBuilder(4*len);
-      buf.append("[");
-      for (int i = 0; i < len; i++) {
-        buf.append(get(i*stride));
-        if (i < len-1) buf.append(", ");
-      }
-      if (len != s) buf.append(", ..."); // and some more...
-      buf.append("]");
-      return buf.toString();
-    }   
   }
   
-  
   ///////////////////////////////////////////////////////////////////////////////
   // Nested classes:
   ///////////////////////////////////////////////////////////////////////////////
@@ -764,7 +785,7 @@ public class MemoryIndex {
 
             @Override
             public long size() {
-              return info.sortedTerms.length;
+              return info.terms.size();
             }
 
             @Override
@@ -775,17 +796,17 @@ public class MemoryIndex {
             @Override
             public long getSumDocFreq() {
               // each term has df=1
-              return info.sortedTerms.length;
+              return info.terms.size();
             }
 
             @Override
             public int getDocCount() {
-              return info.sortedTerms.length > 0 ? 1 : 0;
+              return info.terms.size() > 0 ? 1 : 0;
             }
 
             @Override
             public boolean hasOffsets() {
-              return stride == 3;
+              return storeOffsets;
             }
 
             @Override
@@ -822,48 +843,62 @@ public class MemoryIndex {
         this.info = info;
         info.sortTerms();
       }
+      
+      private final int binarySearch(BytesRef b, BytesRef bytesRef, int low,
+          int high, BytesRefHash hash, int[] ords, Comparator<BytesRef> comparator) {
+        int mid = 0;
+        while (low <= high) {
+          mid = (low + high) >>> 1;
+          hash.get(ords[mid], bytesRef);
+          final int cmp = comparator.compare(bytesRef, b);
+          if (cmp < 0) {
+            low = mid + 1;
+          } else if (cmp > 0) {
+            high = mid - 1;
+          } else {
+            return mid;
+          }
+        }
+        assert comparator.compare(bytesRef, b) != 0;
+        return -(low + 1);
+      }
+    
 
       @Override
       public boolean seekExact(BytesRef text, boolean useCache) {
-        termUpto = Arrays.binarySearch(info.sortedTerms, text, termComparator);
-        if (termUpto >= 0) {
-          br.copyBytes(info.sortedTerms[termUpto].getKey());
-          return true;
-        } else {
-          return false;
-        }
+        termUpto = binarySearch(text, br, 0, info.terms.size()-1, info.terms, info.sortedTerms, BytesRef.getUTF8SortedAsUnicodeComparator());
+        return termUpto >= 0;
       }
 
       @Override
       public SeekStatus seekCeil(BytesRef text, boolean useCache) {
-        termUpto = Arrays.binarySearch(info.sortedTerms, text, termComparator);
+        termUpto = binarySearch(text, br, 0, info.terms.size()-1, info.terms, info.sortedTerms, BytesRef.getUTF8SortedAsUnicodeComparator());
         if (termUpto < 0) { // not found; choose successor
-          termUpto = -termUpto -1;
-          if (termUpto >= info.sortedTerms.length) {
+          termUpto = -termUpto-1;
+          if (termUpto >= info.terms.size()) {
             return SeekStatus.END;
           } else {
-            br.copyBytes(info.sortedTerms[termUpto].getKey());
+            info.terms.get(info.sortedTerms[termUpto], br);
             return SeekStatus.NOT_FOUND;
           }
         } else {
-          br.copyBytes(info.sortedTerms[termUpto].getKey());
           return SeekStatus.FOUND;
         }
       }
 
       @Override
       public void seekExact(long ord) {
-        assert ord < info.sortedTerms.length;
+        assert ord < info.terms.size();
         termUpto = (int) ord;
       }
       
       @Override
       public BytesRef next() {
         termUpto++;
-        if (termUpto >= info.sortedTerms.length) {
+        if (termUpto >= info.terms.size()) {
           return null;
         } else {
-          br.copyBytes(info.sortedTerms[termUpto].getKey());
+          info.terms.get(info.sortedTerms[termUpto], br);
           return br;
         }
       }
@@ -885,7 +920,7 @@ public class MemoryIndex {
 
       @Override
       public long totalTermFreq() {
-        return info.sortedTerms[termUpto].getValue().size();
+        return info.sliceArray.freq[info.sortedTerms[termUpto]];
       }
 
       @Override
@@ -893,7 +928,7 @@ public class MemoryIndex {
         if (reuse == null || !(reuse instanceof MemoryDocsEnum)) {
           reuse = new MemoryDocsEnum();
         }
-        return ((MemoryDocsEnum) reuse).reset(liveDocs, info.sortedTerms[termUpto].getValue());
+        return ((MemoryDocsEnum) reuse).reset(liveDocs, info.sliceArray.freq[info.sortedTerms[termUpto]]);
       }
 
       @Override
@@ -901,7 +936,8 @@ public class MemoryIndex {
         if (reuse == null || !(reuse instanceof MemoryDocsAndPositionsEnum)) {
           reuse = new MemoryDocsAndPositionsEnum();
         }
-        return ((MemoryDocsAndPositionsEnum) reuse).reset(liveDocs, info.sortedTerms[termUpto].getValue());
+        final int ord = info.sortedTerms[termUpto];
+        return ((MemoryDocsAndPositionsEnum) reuse).reset(liveDocs, info.sliceArray.start[ord], info.sliceArray.end[ord], info.sliceArray.freq[ord]);
       }
 
       @Override
@@ -924,16 +960,16 @@ public class MemoryIndex {
     }
     
     private class MemoryDocsEnum extends DocsEnum {
-      private ArrayIntList positions;
       private boolean hasNext;
       private Bits liveDocs;
       private int doc = -1;
+      private int freq;
 
-      public DocsEnum reset(Bits liveDocs, ArrayIntList positions) {
+      public DocsEnum reset(Bits liveDocs, int freq) {
         this.liveDocs = liveDocs;
-        this.positions = positions;
         hasNext = true;
         doc = -1;
+        this.freq = freq;
         return this;
       }
 
@@ -959,26 +995,35 @@ public class MemoryIndex {
 
       @Override
       public int freq() throws IOException {
-        return positions.size();
+        return freq;
       }
     }
     
     private class MemoryDocsAndPositionsEnum extends DocsAndPositionsEnum {
-      private ArrayIntList positions;
-      private int posUpto;
+      private int posUpto; // for assert
       private boolean hasNext;
       private Bits liveDocs;
       private int doc = -1;
+      private SliceReader sliceReader;
+      private int freq;
+      private int startOffset;
+      private int endOffset;
+      
+      public MemoryDocsAndPositionsEnum() {
+        this.sliceReader = new SliceReader(intBlockPool);
+      }
 
-      public DocsAndPositionsEnum reset(Bits liveDocs, ArrayIntList positions) {
+      public DocsAndPositionsEnum reset(Bits liveDocs, int start, int end, int freq) {
         this.liveDocs = liveDocs;
-        this.positions = positions;
-        posUpto = 0;
+        this.sliceReader.reset(start, end);
+        posUpto = 0; // for assert
         hasNext = true;
         doc = -1;
+        this.freq = freq;
         return this;
       }
 
+
       @Override
       public int docID() {
         return doc;
@@ -1001,22 +1046,31 @@ public class MemoryIndex {
 
       @Override
       public int freq() throws IOException {
-        return positions.size() / stride;
+        return freq;
       }
 
       @Override
       public int nextPosition() {
-        return positions.get(posUpto++ * stride);
+        assert posUpto++ < freq;
+        assert !sliceReader.endOfSlice() : " stores offsets : " + startOffset;
+        if (storeOffsets) {
+          int pos = sliceReader.readInt();
+          startOffset = sliceReader.readInt();
+          endOffset = sliceReader.readInt();
+          return pos;
+        } else {
+          return sliceReader.readInt();
+        }
       }
 
       @Override
       public int startOffset() {
-        return stride == 1 ? -1 : positions.get((posUpto - 1) * stride + 1);
+        return startOffset;
       }
 
       @Override
       public int endOffset() {
-        return stride == 1 ? -1 : positions.get((posUpto - 1) * stride + 2);
+        return endOffset;
       }
 
       @Override
@@ -1084,6 +1138,8 @@ public class MemoryIndex {
     
     @Override
     public DocValues normValues(String field) {
+      if (fieldInfos.get(field).omitsNorms())
+        return null;
       DocValues norms = cachedNormValues;
       Similarity sim = getSimilarity();
       if (!field.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached?
@@ -1105,4 +1161,58 @@ public class MemoryIndex {
       return norms;
     }
   }
+  
+  /**
+   * Resets the {@link MemoryIndex} to its initial state and recycles all internal buffers.
+   */
+  public void reset() {
+    this.fieldInfos.clear();
+    this.fields.clear();
+    this.sortedFields = null;
+    byteBlockPool.reset(false, false); // no need to 0-fill the buffers
+    intBlockPool.reset(true, false); // here must must 0-fill since we use slices
+  }
+  
+  private static final class SliceByteStartArray extends DirectBytesStartArray {
+    int[] start; // the start offset in the IntBlockPool per term
+    int[] end; // the end pointer in the IntBlockPool for the postings slice per term
+    int[] freq; // the term frequency
+    
+    public SliceByteStartArray(int initSize) {
+      super(initSize);
+    }
+    
+    @Override
+    public int[] init() {
+      final int[] ord = super.init();
+      start = new int[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_INT)];
+      end = new int[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_INT)];
+      freq = new int[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_INT)];
+      assert start.length >= ord.length;
+      assert end.length >= ord.length;
+      assert freq.length >= ord.length;
+      return ord;
+    }
+
+    @Override
+    public int[] grow() {
+      final int[] ord = super.grow();
+      if (start.length < ord.length) {
+        start = ArrayUtil.grow(start, ord.length);
+        end = ArrayUtil.grow(end, ord.length);
+        freq = ArrayUtil.grow(freq, ord.length);
+      }      
+      assert start.length >= ord.length;
+      assert end.length >= ord.length;
+      assert freq.length >= ord.length;
+      return ord;
+    }
+
+    @Override
+    public int[] clear() {
+     start = end = null;
+     return super.clear();
+    }
+    
+  }
 }

Modified: lucene/dev/branches/LUCENE-2878/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java Fri Nov 23 12:00:32 2012
@@ -33,12 +33,12 @@ class MemoryIndexNormDocValues extends D
     this.source = source;
   }
   @Override
-  public Source load() throws IOException {
+  protected Source loadSource() throws IOException {
     return source;
   }
 
   @Override
-  public Source getDirectSource() throws IOException {
+  protected Source loadDirectSource() throws IOException {
     return source;
   }
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java Fri Nov 23 12:00:32 2012
@@ -34,17 +34,26 @@ import org.apache.lucene.codecs.lucene41
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.CompositeReader;
 import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValues.Source;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.SlowCompositeReaderWrapper;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.queryparser.classic.QueryParser;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.RegexpQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
@@ -52,8 +61,13 @@ import org.apache.lucene.search.spans.Sp
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.ByteBlockPool.Allocator;
+import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.RecyclingByteBlockAllocator;
 import org.apache.lucene.util._TestUtil;
 
 /**
@@ -94,15 +108,18 @@ public class MemoryIndexTest extends Bas
    * runs random tests, up to ITERATIONS times.
    */
   public void testRandomQueries() throws Exception {
-    for (int i = 0; i < ITERATIONS; i++)
-      assertAgainstRAMDirectory();
+    MemoryIndex index =  new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024);
+    for (int i = 0; i < ITERATIONS; i++) {
+      assertAgainstRAMDirectory(index);
+    }
   }
-
+  
   /**
    * Build a randomish document for both RAMDirectory and MemoryIndex,
    * and run all the queries against it.
    */
-  public void assertAgainstRAMDirectory() throws Exception {
+  public void assertAgainstRAMDirectory(MemoryIndex memory) throws Exception {
+    memory.reset();
     StringBuilder fooField = new StringBuilder();
     StringBuilder termField = new StringBuilder();
  
@@ -132,7 +149,6 @@ public class MemoryIndexTest extends Bas
     writer.addDocument(doc);
     writer.close();
     
-    MemoryIndex memory = new MemoryIndex();
     memory.addField("foo", fooField.toString(), analyzer);
     memory.addField("term", termField.toString(), analyzer);
     
@@ -144,10 +160,75 @@ public class MemoryIndexTest extends Bas
     } else {
       assertTrue(memory.getMemorySize() > 0L);
     }
-
+    AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader();
+    DirectoryReader competitor = DirectoryReader.open(ramdir);
+    duellReaders(competitor, reader);
+    IOUtils.close(reader, competitor);
     assertAllQueries(memory, ramdir, analyzer);  
     ramdir.close();    
   }
+
+  private void duellReaders(CompositeReader other, AtomicReader memIndexReader)
+      throws IOException {
+    AtomicReader competitor = new SlowCompositeReaderWrapper(other);
+    Fields memFields = memIndexReader.fields();
+    for (String field : competitor.fields()) {
+      Terms memTerms = memFields.terms(field);
+      Terms iwTerms = memIndexReader.terms(field);
+      if (iwTerms == null) {
+        assertNull(memTerms);
+      } else {
+        DocValues normValues = competitor.normValues(field);
+        DocValues memNormValues = memIndexReader.normValues(field);
+        if (normValues != null) {
+          // mem idx always computes norms on the fly
+          assertNotNull(memNormValues);
+          assertEquals(normValues.getDirectSource().getInt(0), memNormValues.getDirectSource().getInt(0), 0.01);
+        }
+          
+        assertNotNull(memTerms);
+        assertEquals(iwTerms.getDocCount(), memTerms.getDocCount());
+        assertEquals(iwTerms.getSumDocFreq(), memTerms.getSumDocFreq());
+        assertEquals(iwTerms.getSumTotalTermFreq(), memTerms.getSumTotalTermFreq());
+        TermsEnum iwTermsIter = iwTerms.iterator(null);
+        TermsEnum memTermsIter = memTerms.iterator(null);
+        if (iwTerms.hasPositions()) {
+          final boolean offsets = iwTerms.hasOffsets() && memTerms.hasOffsets();
+         
+          while(iwTermsIter.next() != null) {
+            assertNotNull(memTermsIter.next());
+            assertEquals(iwTermsIter.term(), memTermsIter.term());
+            DocsAndPositionsEnum iwDocsAndPos = iwTermsIter.docsAndPositions(null, null);
+            DocsAndPositionsEnum memDocsAndPos = memTermsIter.docsAndPositions(null, null);
+            while(iwDocsAndPos.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) {
+              assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc());
+              assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq());
+              for (int i = 0; i < iwDocsAndPos.freq(); i++) {
+                assertEquals("term: " + iwTermsIter.term().utf8ToString(), iwDocsAndPos.nextPosition(), memDocsAndPos.nextPosition());
+                if (offsets) {
+                  assertEquals(iwDocsAndPos.startOffset(), memDocsAndPos.startOffset());
+                  assertEquals(iwDocsAndPos.endOffset(), memDocsAndPos.endOffset());
+                }
+              }
+              
+            }
+            
+          }
+        } else {
+          while(iwTermsIter.next() != null) {
+            assertEquals(iwTermsIter.term(), memTermsIter.term());
+            DocsEnum iwDocsAndPos = iwTermsIter.docs(null, null);
+            DocsEnum memDocsAndPos = memTermsIter.docs(null, null);
+            while(iwDocsAndPos.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) {
+              assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc());
+              assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq());
+            }
+          }
+        }
+      }
+      
+    }
+  }
   
   /**
    * Run all queries against both the RAMDirectory and MemoryIndex, ensuring they are the same.
@@ -160,7 +241,7 @@ public class MemoryIndexTest extends Bas
     for (String query : queries) {
       TopDocs ramDocs = ram.search(qp.parse(query), 1);
       TopDocs memDocs = mem.search(qp.parse(query), 1);
-      assertEquals(ramDocs.totalHits, memDocs.totalHits);
+      assertEquals(query, ramDocs.totalHits, memDocs.totalHits);
     }
     reader.close();
   }
@@ -202,7 +283,7 @@ public class MemoryIndexTest extends Bas
   
   public void testDocsEnumStart() throws Exception {
     Analyzer analyzer = new MockAnalyzer(random());
-    MemoryIndex memory = new MemoryIndex();
+    MemoryIndex memory = new MemoryIndex(random().nextBoolean(),  random().nextInt(50) * 1024 * 1024);
     memory.addField("foo", "bar", analyzer);
     AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader();
     DocsEnum disi = _TestUtil.docs(random(), reader, "foo", new BytesRef("bar"), null, null, 0);
@@ -220,27 +301,40 @@ public class MemoryIndexTest extends Bas
     reader.close();
   }
   
+  private Allocator randomByteBlockAllocator() {
+    if (random().nextBoolean()) {
+      return new RecyclingByteBlockAllocator();
+    } else {
+      return new ByteBlockPool.DirectAllocator();
+    }
+  }
+  
   public void testDocsAndPositionsEnumStart() throws Exception {
     Analyzer analyzer = new MockAnalyzer(random());
-    MemoryIndex memory = new MemoryIndex(true);
-    memory.addField("foo", "bar", analyzer);
-    AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader();
-    DocsAndPositionsEnum disi = reader.termPositionsEnum(new Term("foo", "bar"));
-    int docid = disi.docID();
-    assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
-    assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
-    assertEquals(0, disi.nextPosition());
-    assertEquals(0, disi.startOffset());
-    assertEquals(3, disi.endOffset());
-    
-    // now reuse and check again
-    TermsEnum te = reader.terms("foo").iterator(null);
-    assertTrue(te.seekExact(new BytesRef("bar"), true));
-    disi = te.docsAndPositions(null, disi);
-    docid = disi.docID();
-    assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
-    assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
-    reader.close();
+    int numIters = atLeast(3);
+    MemoryIndex memory = new MemoryIndex(true,  random().nextInt(50) * 1024 * 1024);
+    for (int i = 0; i < numIters; i++) { // check reuse
+      memory.addField("foo", "bar", analyzer);
+      AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader();
+      assertEquals(1, reader.terms("foo").getSumTotalTermFreq());
+      DocsAndPositionsEnum disi = reader.termPositionsEnum(new Term("foo", "bar"));
+      int docid = disi.docID();
+      assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
+      assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+      assertEquals(0, disi.nextPosition());
+      assertEquals(0, disi.startOffset());
+      assertEquals(3, disi.endOffset());
+      
+      // now reuse and check again
+      TermsEnum te = reader.terms("foo").iterator(null);
+      assertTrue(te.seekExact(new BytesRef("bar"), true));
+      disi = te.docsAndPositions(null, disi);
+      docid = disi.docID();
+      assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
+      assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+      reader.close();
+      memory.reset();
+    }
   }
 
   // LUCENE-3831
@@ -248,7 +342,7 @@ public class MemoryIndexTest extends Bas
     RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
     SpanQuery wrappedquery = new SpanMultiTermQueryWrapper<RegexpQuery>(regex);
         
-    MemoryIndex mindex = new MemoryIndex();
+    MemoryIndex mindex = new MemoryIndex(random().nextBoolean(),  random().nextInt(50) * 1024 * 1024);
     mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", new StringReader("hello there")));
 
     // This throws an NPE
@@ -260,10 +354,65 @@ public class MemoryIndexTest extends Bas
     RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
     SpanQuery wrappedquery = new SpanOrQuery(new SpanMultiTermQueryWrapper<RegexpQuery>(regex));
 
-    MemoryIndex mindex = new MemoryIndex();
+    MemoryIndex mindex = new MemoryIndex(random().nextBoolean(),  random().nextInt(50) * 1024 * 1024);
     mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", new StringReader("hello there")));
 
     // This passes though
     assertEquals(0, mindex.search(wrappedquery), 0.00001f);
   }
+  
+  public void testSameFieldAddedMultipleTimes() throws IOException {
+    MemoryIndex mindex = new MemoryIndex(random().nextBoolean(),  random().nextInt(50) * 1024 * 1024);
+    MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
+    mindex.addField("field", "the quick brown fox", mockAnalyzer);
+    mindex.addField("field", "jumps over the", mockAnalyzer);
+    AtomicReader reader = (AtomicReader) mindex.createSearcher().getIndexReader();
+    assertEquals(7, reader.terms("field").getSumTotalTermFreq());
+    PhraseQuery query = new PhraseQuery();
+    query.add(new Term("field", "fox"));
+    query.add(new Term("field", "jumps"));
+    assertTrue(mindex.search(query) > 0.1);
+    mindex.reset();
+    mockAnalyzer.setPositionIncrementGap(1 + random().nextInt(10));
+    mindex.addField("field", "the quick brown fox", mockAnalyzer);
+    mindex.addField("field", "jumps over the", mockAnalyzer);
+    assertEquals(0, mindex.search(query), 0.00001f);
+    query.setSlop(10);
+    assertTrue("posGap" + mockAnalyzer.getPositionIncrementGap("field") , mindex.search(query) > 0.0001);
+  }
+  
+  
+  public void testDuellMemIndex() throws IOException {
+    LineFileDocs lineFileDocs = new LineFileDocs(random());
+    int numDocs = atLeast(10);
+    MemoryIndex memory = new MemoryIndex(random().nextBoolean(),  random().nextInt(50) * 1024 * 1024);
+    for (int i = 0; i < numDocs; i++) {
+      Directory dir = newDirectory();
+      MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
+      IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), TEST_VERSION_CURRENT, mockAnalyzer));
+      Document nextDoc = lineFileDocs.nextDoc();
+      Document doc = new Document();
+      for (Field field : nextDoc.getFields()) {
+        if (field.fieldType().indexed()) {
+          doc.add(field);
+          if (random().nextInt(3) == 0) {
+            doc.add(field);  // randomly add the same field twice
+          }
+        }
+      }
+      
+      writer.addDocument(doc);
+      writer.close();
+      for (IndexableField field : doc.indexableFields()) {
+          memory.addField(field.name(), ((Field)field).stringValue(), mockAnalyzer);  
+      }
+      DirectoryReader competitor = DirectoryReader.open(dir);
+      AtomicReader memIndexReader= (AtomicReader) memory.createSearcher().getIndexReader();
+      duellReaders(competitor, memIndexReader);
+      IOUtils.close(competitor, memIndexReader);
+      memory.reset();
+      dir.close();
+    }
+    lineFileDocs.close();
+  }
 }

Modified: lucene/dev/branches/LUCENE-2878/lucene/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java Fri Nov 23 12:00:32 2012
@@ -108,7 +108,7 @@ public class SweetSpotSimilarity extends
    * discountOverlaps is true by default or true for this
    * specific field. */
   @Override
-  public void computeNorm(FieldInvertState state, Norm norm) {
+  public float lengthNorm(FieldInvertState state) {
     final int numTokens;
 
     if (discountOverlaps)
@@ -116,7 +116,7 @@ public class SweetSpotSimilarity extends
     else
       numTokens = state.getLength();
 
-    norm.setByte(encodeNormValue(state.getBoost() * computeLengthNorm(numTokens)));
+    return state.getBoost() * computeLengthNorm(numTokens);
   }
 
   /**

Modified: lucene/dev/branches/LUCENE-2878/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java Fri Nov 23 12:00:32 2012
@@ -328,7 +328,7 @@ public class CustomScoreQuery extends Qu
     }
 
     @Override
-    public float freq() throws IOException {
+    public int freq() throws IOException {
       return subQueryScorer.freq();
     }
 



Mime
View raw message