lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r887672 - in /lucene/java/branches/flex_1458/src: java/org/apache/lucene/index/ java/org/apache/lucene/index/codecs/preflex/ test/org/apache/lucene/index/
Date Sun, 06 Dec 2009 11:56:13 GMT
Author: mikemccand
Date: Sun Dec  6 11:56:12 2009
New Revision: 887672

URL: http://svn.apache.org/viewvc?rev=887672&view=rev
Log:
LUCENE-2112 (on flex branch): fix bugs in 'flex on non-flex external reader' emulation

Added:
    lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestFlex.java   (with
props)
    lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestFlexExternalReader.java
  (with props)
Modified:
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFields.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFieldsEnum.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyTerms.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentReader.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
    lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestStressIndexing2.java

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFields.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFields.java?rev=887672&r1=887671&r2=887672&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFields.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFields.java Sun
Dec  6 11:56:12 2009
@@ -24,7 +24,6 @@
  *  core. */
 class LegacyFields extends Fields {
   private final IndexReader r;
-  private TermEnum terms;
 
   public LegacyFields(IndexReader r) throws IOException {
     this.r = r;
@@ -37,7 +36,6 @@
 
   @Override
   public Terms terms(String field) throws IOException {
-    // nocommit
     return new LegacyTerms(r, field);
   }
 }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFieldsEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFieldsEnum.java?rev=887672&r1=887671&r2=887672&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFieldsEnum.java
(original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFieldsEnum.java
Sun Dec  6 11:56:12 2009
@@ -65,6 +65,7 @@
     private final String field;
     private TermEnum terms;
     private TermRef current;
+    private final TermRef tr = new TermRef();
 
     LegacyTermsEnum(IndexReader r, String field) throws IOException {
       this.r = r;
@@ -80,24 +81,27 @@
 
     @Override
     public SeekStatus seek(TermRef text) throws IOException {
-
-      // nocommit: too slow?
+      
+      // nocommit -- should we optimize for "silly seek"
+      // cases, here?  ie seek to term you're already on, to
+      // very next term , etc.
       terms.close();
       terms = r.terms(new Term(field, text.toString()));
+
       final Term t = terms.term();
       if (t == null) {
         current = null;
         return SeekStatus.END;
-      } else {
-        final TermRef tr = new TermRef(t.text());
+      } else if (t.field() == field) {
+        tr.copy(t.text());
+        current = tr;
         if (text.termEquals(tr)) {
-          current = tr;
           return SeekStatus.FOUND;
         } else {
-          // nocommit reuse TermRef instance
-          current = tr;
           return SeekStatus.NOT_FOUND;
         }
+      } else {
+        return SeekStatus.END;
       }
     }
 
@@ -114,8 +118,12 @@
     @Override
     public TermRef next() throws IOException {
       if (terms.next()) {
-        // nocommit -- reuse TermRef instance
-        current = new TermRef(terms.term().text());
+        if (terms.term().field == field) {
+          tr.copy(terms.term().text());
+          current = tr;
+        } else {
+          current = null;
+        }
         return current;
       } else {
         current = null;

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyTerms.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyTerms.java?rev=887672&r1=887671&r2=887672&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyTerms.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyTerms.java Sun Dec
 6 11:56:12 2009
@@ -17,9 +17,10 @@
  * limitations under the License.
  */
 
-
 import java.io.IOException;
 
+import org.apache.lucene.util.StringHelper;
+
 /** Implements flex API (FieldsEnum/TermsEnum) on top of
  *  pre-flex API.  Used only for IndexReader impls outside
  *  Lucene's core. */
@@ -30,7 +31,7 @@
 
   LegacyTerms(IndexReader r, String field) {
     this.r = r;
-    this.field = field;
+    this.field = StringHelper.intern(field);
   }
 
   @Override

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentReader.java?rev=887672&r1=887671&r2=887672&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentReader.java Sun
Dec  6 11:56:12 2009
@@ -38,8 +38,6 @@
 import org.apache.lucene.util.BitVector;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.CloseableThreadLocal;
-import org.apache.lucene.util.cache.Cache;
-import org.apache.lucene.util.cache.SimpleLRUCache;
 import org.apache.lucene.index.codecs.Codecs;
 import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.codecs.preflex.PreFlexFields;
@@ -923,31 +921,6 @@
       return new LegacyTermPositions();
   }
 
-  private final CloseableThreadLocal perThread = new CloseableThreadLocal();
-
-  // nocommit -- move term vectors under here
-  private static final class PerThread {
-    LegacyTermEnum terms;
-    
-    // Used for caching the least recently looked-up Terms
-    Cache termsCache;
-  }
-
-  private final static int DEFAULT_TERMS_CACHE_SIZE = 1024;
-
-  private PerThread getPerThread() throws IOException {
-    PerThread resources = (PerThread) perThread.get();
-    if (resources == null) {
-      resources = new PerThread();
-      resources.terms = new LegacyTermEnum(null);
-      // Cache does not have to be thread-safe, it is only used by one thread at the same
time
-      resources.termsCache = new SimpleLRUCache(DEFAULT_TERMS_CACHE_SIZE);
-      perThread.set(resources);
-    }
-    return resources;
-  }
-
-  
   @Override
   public int docFreq(Term t) throws IOException {
     ensureOpen();
@@ -1354,13 +1327,14 @@
     TermRef currentTerm;
 
     public LegacyTermEnum(Term t) throws IOException {
-      //System.out.println("sr.lte.init: term=" + t);
+      // System.out.println("sr.lte.init: term=" + t);
       fields = core.fields.iterator();
       currentField = fields.next();
       if (currentField == null) {
+        // no fields
         done = true;
       } else if (t != null) {
-        // Pre-seek
+        // Pre-seek to this term
 
         // nocommit -- inefficient; do we need
         // FieldsEnum.seek? (but this is slow only for
@@ -1375,29 +1349,43 @@
         }
 
         if (!done) {
-          if (currentField == t.field) {
-            // Field matches -- get terms
-            terms = fields.terms();
+          // We found some field -- get its terms:
+          terms = fields.terms();
+
+          if (currentField.equals(t.field)) {
+            // We found exactly the requested field; now
+            // seek the term text:
             String text = t.text();
             TermRef tr;
+
             // this is a hack only for backwards compatibility.
             // previously you could supply a term ending with a lead surrogate,
             // and it would return the next Term.
             // if someone does this, tack on the lowest possible trail surrogate.
             // this emulates the old behavior, and forms "valid UTF-8" unicode.
             if (text.length() > 0 
-                && Character.isHighSurrogate(text.charAt(text.length() - 1)))
+                && Character.isHighSurrogate(text.charAt(text.length() - 1))) {
               tr = new TermRef(t.text() + "\uDC00");
-            else
+            } else {
               tr = new TermRef(t.text());
+            }
             TermsEnum.SeekStatus status = terms.seek(tr);
             if (status == TermsEnum.SeekStatus.END) {
-              // leave currentTerm null
+              // Rollover to the next field
+              terms = null;
+              next();
             } else if (status == TermsEnum.SeekStatus.FOUND) {
+              // Found exactly the term
               currentTerm = tr;
             } else {
+              // Found another term, in this same field
               currentTerm = terms.term();
             }
+          } else {
+            // We didn't find exact field (we found the
+            // following field); advance to first term in
+            // this field
+            next();
           }
         }
       } else {
@@ -1433,7 +1421,8 @@
           // This field still has terms
           return true;
         } else {
-          // Done producing terms from this field
+          // Done producing terms from this field; advance
+          // to next field
           terms = null;
         }
       }
@@ -1441,10 +1430,8 @@
 
     @Override
     public Term term() {
-      if (terms != null && !done) {
-        if (currentTerm != null) {
-          return new Term(currentField, currentTerm.toString());
-        }
+      if (!done && terms != null && currentTerm != null) {
+        return new Term(currentField, currentTerm.toString());
       }
       return null;
     }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java?rev=887672&r1=887671&r2=887672&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
(original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
Sun Dec  6 11:56:12 2009
@@ -58,6 +58,7 @@
   private final int readBufferSize;
   private Directory cfsReader;
 
+  // nocommit -- we need the legacy terms cache back in here
   PreFlexFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, int readBufferSize,
int indexDivisor)
     throws IOException {
 
@@ -364,6 +365,7 @@
 
     @Override
     public DocsEnum docs(Bits skipDocs) throws IOException {
+      // nocommit -- reuse?
       return new PreDocsEnum(skipDocs, terms);
     }
   }

Added: lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestFlex.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestFlex.java?rev=887672&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestFlex.java (added)
+++ lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestFlex.java Sun Dec
 6 11:56:12 2009
@@ -0,0 +1,60 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.*;
+import java.util.*;
+import org.apache.lucene.store.*;
+import org.apache.lucene.search.*;
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.document.*;
+import org.apache.lucene.util.*;
+
+public class TestFlex extends LuceneTestCase {
+
+  // Test non-flex API emulated on flex index
+  public void testNonFlex() throws Exception {
+    Directory d = new MockRAMDirectory();
+
+    final int DOC_COUNT = 177;
+
+    IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(),
+                                    IndexWriter.MaxFieldLength.UNLIMITED);
+    w.setMaxBufferedDocs(7);
+    Document doc = new Document();
+    doc.add(new Field("field1", "this is field1", Field.Store.NO, Field.Index.ANALYZED));
+    doc.add(new Field("field2", "this is field2", Field.Store.NO, Field.Index.ANALYZED));
+    doc.add(new Field("field3", "aaa", Field.Store.NO, Field.Index.ANALYZED));
+    doc.add(new Field("field4", "bbb", Field.Store.NO, Field.Index.ANALYZED));
+    for(int i=0;i<DOC_COUNT;i++) {
+      w.addDocument(doc);
+    }
+
+    IndexReader r = w.getReader();
+
+    TermEnum terms = r.terms(new Term("field3", "bbb"));
+    // pre-flex API should seek to the next field
+    assertNotNull(terms.term());
+    assertEquals("field4", terms.term().field());
+
+    r.close();
+    w.close();
+    d.close();
+  }
+}
+

Propchange: lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestFlex.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestFlexExternalReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestFlexExternalReader.java?rev=887672&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestFlexExternalReader.java
(added)
+++ lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestFlexExternalReader.java
Sun Dec  6 11:56:12 2009
@@ -0,0 +1,182 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.*;
+import java.util.*;
+import org.apache.lucene.store.*;
+import org.apache.lucene.search.*;
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.document.*;
+import org.apache.lucene.util.*;
+
+public class TestFlexExternalReader extends LuceneTestCase {
+
+  // Delegates to a "normal" IndexReader, making it look
+  // "external", to force testing of the "flex API on
+  // external reader" layer
+  private final static class ExternalReader extends IndexReader {
+    private final IndexReader r;
+    public ExternalReader(IndexReader r) {
+      this.r = r;
+    }
+
+    public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
+      return r.getTermFreqVectors(docNumber);
+    }
+
+    public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException
{
+      return r.getTermFreqVector(docNumber, field);
+    }
+
+    public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws
IOException {
+      r.getTermFreqVector(docNumber, field, mapper);
+    }
+
+    public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException
{
+      r.getTermFreqVector(docNumber, mapper);
+    }
+
+    public int numDocs() {
+      return r.numDocs();
+    }
+
+    public int maxDoc() {
+      return r.maxDoc();
+    }
+
+    public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException,
IOException {
+      return r.document(n, fieldSelector);
+    }
+
+    public boolean isDeleted(int n) {
+      return r.isDeleted(n);
+    }
+
+    public boolean hasDeletions() {
+      return r.hasDeletions();
+    }
+
+    public byte[] norms(String field) throws IOException {
+      return r.norms(field);
+    }
+
+    public void norms(String field, byte[] bytes, int offset) 
+      throws IOException {
+      r.norms(field, bytes, offset);
+    }
+    
+    protected  void doSetNorm(int doc, String field, byte value)
+      throws CorruptIndexException, IOException {
+      r.doSetNorm(doc, field, value);
+    }
+
+    public TermEnum terms() throws IOException {
+      return r.terms();
+    }
+
+    public TermEnum terms(Term t) throws IOException {
+      return r.terms(t);
+    }
+
+    public int docFreq(Term t) throws IOException {
+      return r.docFreq(t);
+    }
+
+    public TermDocs termDocs() throws IOException {
+      return r.termDocs();
+    }
+
+    public TermPositions termPositions() throws IOException {
+      return r.termPositions();
+    }
+
+    public void doDelete(int docID) throws IOException {
+      r.doDelete(docID);
+    }
+
+    public void doUndeleteAll() throws IOException {
+      r.doUndeleteAll();
+    }
+
+    protected void doCommit(Map<String, String> commitUserData) throws IOException
{
+      r.doCommit(commitUserData);
+    }
+
+    protected void doClose() throws IOException {
+      r.doClose();
+    }
+
+    public Collection<String> getFieldNames(FieldOption fldOption) {
+      return r.getFieldNames(fldOption);
+    }
+  }
+
+  public void testExternalReader() throws Exception {
+    Directory d = new MockRAMDirectory();
+
+    final int DOC_COUNT = 177;
+
+    IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(),
+                                    IndexWriter.MaxFieldLength.UNLIMITED);
+    w.setMaxBufferedDocs(7);
+    Document doc = new Document();
+    doc.add(new Field("field1", "this is field1", Field.Store.NO, Field.Index.ANALYZED));
+    doc.add(new Field("field2", "this is field2", Field.Store.NO, Field.Index.ANALYZED));
+    doc.add(new Field("field3", "aaa", Field.Store.NO, Field.Index.ANALYZED));
+    doc.add(new Field("field4", "bbb", Field.Store.NO, Field.Index.ANALYZED));
+    for(int i=0;i<DOC_COUNT;i++) {
+      w.addDocument(doc);
+    }
+
+    IndexReader r = new ExternalReader(w.getReader());
+
+    TermRef field1Term = new TermRef("field1");
+    TermRef field2Term = new TermRef("field2");
+
+    assertEquals(DOC_COUNT, r.maxDoc());
+    assertEquals(DOC_COUNT, r.numDocs());
+    assertEquals(DOC_COUNT, r.docFreq(new Term("field1", "field1")));
+    assertEquals(DOC_COUNT, r.docFreq("field1", field1Term));
+
+    Fields fields = r.fields();
+    Terms terms = fields.terms("field1");
+    TermsEnum termsEnum = terms.iterator();
+    assertEquals(TermsEnum.SeekStatus.FOUND, termsEnum.seek(field1Term));
+
+    assertEquals(TermsEnum.SeekStatus.NOT_FOUND, termsEnum.seek(field2Term));
+    assertTrue(new TermRef("is").termEquals(termsEnum.term()));
+
+    terms = fields.terms("field2");
+    termsEnum = terms.iterator();
+    assertEquals(TermsEnum.SeekStatus.NOT_FOUND, termsEnum.seek(field1Term));
+    assertTrue(termsEnum.term().termEquals(field2Term));
+
+    assertEquals(TermsEnum.SeekStatus.FOUND, termsEnum.seek(field2Term));
+
+    termsEnum = fields.terms("field3").iterator();
+    assertEquals(TermsEnum.SeekStatus.END, termsEnum.seek(new TermRef("bbb")));
+
+    assertEquals(TermsEnum.SeekStatus.FOUND, termsEnum.seek(new TermRef("aaa")));
+    assertNull(termsEnum.next());
+
+    r.close();
+    w.close();
+    d.close();
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestFlexExternalReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestStressIndexing2.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestStressIndexing2.java?rev=887672&r1=887671&r2=887672&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestStressIndexing2.java
(original)
+++ lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestStressIndexing2.java
Sun Dec  6 11:56:12 2009
@@ -28,6 +28,8 @@
 
 import junit.framework.TestCase;
 
+// nocommit -- cut test over to flex API, but not too soon
+// (it catches bugs in emulation)
 public class TestStressIndexing2 extends LuceneTestCase {
   static int maxFields=4;
   static int bigFieldSize=10;



Mime
View raw message