lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From romseyg...@apache.org
Subject svn commit: r1633321 - in /lucene/dev/trunk/lucene: CHANGES.txt memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
Date Tue, 21 Oct 2014 09:11:52 GMT
Author: romseygeek
Date: Tue Oct 21 09:11:51 2014
New Revision: 1633321

URL: http://svn.apache.org/r1633321
Log:
LUCENE-5911: Remove cacheing of norms, calculate up front in freeze()

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
    lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1633321&r1=1633320&r2=1633321&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Oct 21 09:11:51 2014
@@ -71,7 +71,7 @@ New Features
   Robert Muir)
 
 * LUCENE-5911: Add MemoryIndex.freeze() to allow thread-safe searching over a 
-  MemoryIndex. (Alan Woodward)
+  MemoryIndex. (Alan Woodward, David Smiley, Robert Muir)
 
 * LUCENE-5969: Lucene 5.0 has a new index format with mismatched file detection,
   improved exception handling, and indirect norms encoding for sparse fields.

Modified: lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1633321&r1=1633320&r2=1633321&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
(original)
+++ lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
Tue Oct 21 09:11:51 2014
@@ -208,6 +208,8 @@ public class MemoryIndex {
   private Counter bytesUsed;
 
   private boolean frozen = false;
+
+  private Similarity normSimilarity = IndexSearcher.getDefaultSimilarity();
   
   /**
    * Sorts term entries into ascending order; also works for
@@ -500,6 +502,15 @@ public class MemoryIndex {
   }
 
   /**
+   * Set the Similarity to be used for calculating field norms
+   */
+  public void setSimilarity(Similarity similarity) {
+    if (frozen)
+      throw new IllegalArgumentException("Cannot set Similarity when MemoryIndex is frozen");
+    this.normSimilarity = similarity;
+  }
+
+  /**
    * Creates and returns a searcher that can be used to execute arbitrary
    * Lucene queries and to collect the resulting query results as hits.
    * 
@@ -508,7 +519,7 @@ public class MemoryIndex {
   public IndexSearcher createSearcher() {
     MemoryIndexReader reader = new MemoryIndexReader();
     IndexSearcher searcher = new IndexSearcher(reader); // ensures no auto-close !!
-    reader.setSearcher(searcher); // to later get hold of searcher.getSimilarity()
+    searcher.setSimilarity(normSimilarity);
     return searcher;
   }
 
@@ -524,6 +535,7 @@ public class MemoryIndex {
     for (Map.Entry<String,Info> info : sortedFields) {
       info.getValue().sortTerms();
     }
+    calculateNormValues();
   }
   
   /**
@@ -744,8 +756,6 @@ public class MemoryIndex {
    */
   private final class MemoryIndexReader extends LeafReader {
     
-    private IndexSearcher searcher; // needed to find searcher.getSimilarity() 
-    
     private MemoryIndexReader() {
       super(); // avoid as much superclass baggage as possible
     }
@@ -1169,15 +1179,6 @@ public class MemoryIndex {
         return null;
       }
     }
-
-    private Similarity getSimilarity() {
-      if (searcher != null) return searcher.getSimilarity();
-      return IndexSearcher.getDefaultSimilarity();
-    }
-    
-    private void setSearcher(IndexSearcher searcher) {
-      this.searcher = searcher;
-    }
   
     @Override
     public int numDocs() {
@@ -1202,33 +1203,35 @@ public class MemoryIndex {
       if (DEBUG) System.err.println("MemoryIndexReader.doClose");
     }
     
-    /** performance hack: cache norms to avoid repeated expensive calculations */
-    private NumericDocValues cachedNormValues;
-    private String cachedFieldName;
-    private Similarity cachedSimilarity;
-    
     @Override
     public NumericDocValues getNormValues(String field) {
-      FieldInfo fieldInfo = fieldInfos.get(field);
-      if (fieldInfo == null || fieldInfo.omitsNorms())
-        return null;
-      NumericDocValues norms = cachedNormValues;
-      Similarity sim = getSimilarity();
-      if (!field.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached?
-        Info info = getInfo(field);
-        int numTokens = info != null ? info.numTokens : 0;
-        int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
-        float boost = info != null ? info.getBoost() : 1.0f; 
-        FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens,
0, boost);
-        long value = sim.computeNorm(invertState);
-        norms = new MemoryIndexNormDocValues(value);
-        // cache it for future reuse
-        cachedNormValues = norms;
-        cachedFieldName = field;
-        cachedSimilarity = sim;
-        if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value +
":" + numTokens);
-      }
-      return norms;
+      if (norms == null)
+        return calculateFieldNormValue(field);
+      return norms.get(field);
+    }
+
+  }
+
+  private Map<String, NumericDocValues> norms = null;
+
+  private NumericDocValues calculateFieldNormValue(String field) {
+    FieldInfo fieldInfo = fieldInfos.get(field);
+    if (fieldInfo == null)
+      return null;
+    Info info = fields.get(field);
+    int numTokens = info != null ? info.numTokens : 0;
+    int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
+    float boost = info != null ? info.getBoost() : 1.0f;
+    FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens,
0, boost);
+    long value = normSimilarity.computeNorm(invertState);
+    if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":"
+ numTokens);
+    return new MemoryIndexNormDocValues(value);
+  }
+
+  private void calculateNormValues() {
+    norms = new HashMap<>();
+    for (String field : fieldInfos.keySet()) {
+      norms.put(field, calculateFieldNormValue(field));
     }
   }
   
@@ -1239,6 +1242,8 @@ public class MemoryIndex {
     this.fieldInfos.clear();
     this.fields.clear();
     this.sortedFields = null;
+    this.norms = null;
+    this.normSimilarity = IndexSearcher.getDefaultSimilarity();
     byteBlockPool.reset(false, false); // no need to 0-fill the buffers
     intBlockPool.reset(true, false); // here must must 0-fill since we use slices
     this.frozen = false;

Modified: lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java?rev=1633321&r1=1633320&r2=1633321&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
(original)
+++ lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
Tue Oct 21 09:11:51 2014
@@ -18,13 +18,20 @@ package org.apache.lucene.index.memory;
  */
 
 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.similarities.BM25Similarity;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.util.LuceneTestCase;
 import org.junit.Before;
 import org.junit.Test;
 
+import java.io.IOException;
+
 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.CoreMatchers.not;
 import static org.junit.internal.matchers.StringContains.containsString;
@@ -63,6 +70,14 @@ public class TestMemoryIndex extends Luc
       assertThat(e.getMessage(), containsString("frozen"));
     }
 
+    try {
+      mi.setSimilarity(new BM25Similarity(1, 1));
+      fail("Expected an IllegalArgumentException when setting the Similarity after calling
freeze()");
+    }
+    catch (RuntimeException e) {
+      assertThat(e.getMessage(), containsString("frozen"));
+    }
+
     assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));
 
     mi.reset();
@@ -70,6 +85,32 @@ public class TestMemoryIndex extends Luc
     assertThat(mi.search(new TermQuery(new Term("f1", "some"))), is(0.0f));
     assertThat(mi.search(new TermQuery(new Term("f1", "wibble"))), not(is(0.0f)));
 
+    // check we can set the Similarity again
+    mi.setSimilarity(new DefaultSimilarity());
+
+  }
+
+  @Test
+  public void testSimilarities() throws IOException {
+
+    MemoryIndex mi = new MemoryIndex();
+    mi.addField("f1", "a long text field that contains many many terms", analyzer);
+
+    IndexSearcher searcher = mi.createSearcher();
+    LeafReader reader = (LeafReader) searcher.getIndexReader();
+    float n1 = reader.getNormValues("f1").get(0);
+
+    // Norms aren't cached, so we can change the Similarity
+    mi.setSimilarity(new DefaultSimilarity() {
+      @Override
+      public float lengthNorm(FieldInvertState state) {
+        return 74;
+      }
+    });
+    float n2 = reader.getNormValues("f1").get(0);
+
+    assertTrue(n1 != n2);
+
   }
 
 



Mime
View raw message