lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r825709 - in /lucene/java/branches/flex_1458: ./ src/java/org/apache/lucene/index/ src/java/org/apache/lucene/index/codecs/ src/java/org/apache/lucene/index/codecs/pulsing/ src/java/org/apache/lucene/index/codecs/sep/ src/java/org/apache/lu...
Date Fri, 16 Oct 2009 00:07:04 GMT
Author: mikemccand
Date: Fri Oct 16 00:07:04 2009
New Revision: 825709

URL: http://svn.apache.org/viewvc?rev=825709&view=rev
Log:
improve starting RAM usage & other fixes

Modified:
    lucene/java/branches/flex_1458/common-build.xml
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFieldsEnum.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsProducer.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsReader.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepDocsReader.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsReader.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsWriter.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java

Modified: lucene/java/branches/flex_1458/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/common-build.xml?rev=825709&r1=825708&r2=825709&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/common-build.xml (original)
+++ lucene/java/branches/flex_1458/common-build.xml Fri Oct 16 00:07:04 2009
@@ -394,7 +394,7 @@
 	      </or></not>
 	    </condition>
 	  	<mkdir dir="@{junit.output.dir}"/>
-	    <junit printsummary="off" haltonfailure="no" maxmemory="1000M"
+	    <junit printsummary="off" haltonfailure="no" maxmemory="512M"
 	      errorProperty="tests.failed" failureProperty="tests.failed">
 	      <classpath refid="@{junit.classpath}"/>
 	      <assertions>

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFieldsEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFieldsEnum.java?rev=825709&r1=825708&r2=825709&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFieldsEnum.java
(original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFieldsEnum.java
Fri Oct 16 00:07:04 2009
@@ -48,9 +48,10 @@
 
   public String next() throws IOException {
 
-    final Term seekTo = new Term(field, "\uFFFF");
-
-    doSeek(seekTo);
+    if (field != null) {
+      final Term seekTo = new Term(field, "\uFFFF");
+      doSeek(seekTo);
+    }
     if (terms.term() != null) {
       String newField = terms.term().field;
       assert !newField.equals(field);

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java?rev=825709&r1=825708&r2=825709&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java Fri Oct 16
00:07:04 2009
@@ -115,18 +115,19 @@
   }
   
   private int hash(byte a[]) {
-    if (a == null)
-        return 0;
+    if (a == null) {
+      return 0;
+    }
     int result = 1;
     int upTo = offset;
-    for(int i = 0; i < length; i++)
-        result = 31 * result + bytes[upTo++];
+    for(int i = 0; i < length; i++) {
+      result = 31 * result + bytes[upTo++];
+    }
     return result;
-}
+  }
 
   @Override
   public boolean equals(Object other) {
-
     return this.termEquals((TermRef) other);
   }
 

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsProducer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsProducer.java?rev=825709&r1=825708&r2=825709&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsProducer.java
(original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsProducer.java
Fri Oct 16 00:07:04 2009
@@ -24,6 +24,9 @@
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Bits;
 
+// nocommit -- circular, not clean
+import org.apache.lucene.index.codecs.standard.StandardTermsDictReader.CacheEntry;
+
 
 // nocommit -- this is tied to StandarTermsDictWriter;
 // shouldn't it be named StandardDocsProducer?  hmm, though,
@@ -38,7 +41,6 @@
 public abstract class DocsProducer {
   
   public abstract class Reader {
-    public class State {}
     
     public abstract void readTerm(int docFreq, boolean isIndexTerm) throws IOException;
 
@@ -46,9 +48,9 @@
     public abstract DocsEnum docs(Bits deletedDocs) throws IOException;
     
     // nocommit: fooling around with reusable
-    public abstract State captureState(State reusableState);
+    public abstract CacheEntry captureState(CacheEntry reusableState);
     
-    public abstract void setState(State state) throws IOException;
+    public abstract void setState(CacheEntry state, int docFreq) throws IOException;
     
     public boolean canCaptureState() {
       return false;

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsReader.java?rev=825709&r1=825708&r2=825709&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsReader.java
(original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsReader.java
Fri Oct 16 00:07:04 2009
@@ -26,6 +26,7 @@
 import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.codecs.DocsProducer;
 import org.apache.lucene.index.codecs.pulsing.PulsingDocsWriter.Document;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictReader.CacheEntry;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.ArrayUtil;
@@ -297,13 +298,13 @@
     }
 
     @Override
-    public State captureState(State reusableState) {
+    public CacheEntry captureState(CacheEntry reusableState) {
       // TODO Auto-generated method stub
       return null;
     }
 
     @Override
-    public void setState(State state) throws IOException {
+    public void setState(CacheEntry state, int docFreq) throws IOException {
       // TODO Auto-generated method stub
       
     }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepDocsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepDocsReader.java?rev=825709&r1=825708&r2=825709&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepDocsReader.java
(original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepDocsReader.java
Fri Oct 16 00:07:04 2009
@@ -30,6 +30,7 @@
 import org.apache.lucene.index.PositionsEnum;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictReader.CacheEntry;
 
 /** Concrete class that reads the current doc/freq/skip
  *  postings format */
@@ -519,15 +520,14 @@
     }
 
     @Override
-    public State captureState(State reusableState) {
+    public CacheEntry captureState(CacheEntry reusableState) {
       // TODO Auto-generated method stub
       return null;
     }
 
     @Override
-    public void setState(State state) throws IOException {
+    public void setState(CacheEntry state, int docFreq) throws IOException {
       // TODO Auto-generated method stub
-      
     }
   }
 }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java?rev=825709&r1=825708&r2=825709&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
(original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
Fri Oct 16 00:07:04 2009
@@ -130,6 +130,8 @@
   int blockUpto;
   int blockOffset;
 
+  // nocommit -- is this big enough, given max allowed term
+  // size (measured in chars!!) ?
   private static final int BYTE_BLOCK_SHIFT = 15;
   private static final int BYTE_BLOCK_SIZE = 1 << BYTE_BLOCK_SHIFT;
   private static final int BYTE_BLOCK_MASK = BYTE_BLOCK_SIZE - 1;

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsReader.java?rev=825709&r1=825708&r2=825709&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsReader.java
(original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsReader.java
Fri Oct 16 00:07:04 2009
@@ -30,6 +30,7 @@
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.index.codecs.DocsProducer;
+import org.apache.lucene.index.codecs.standard.StandardTermsDictReader.CacheEntry;
 
 /** Concrete class that reads the current doc/freq/skip
  *  postings format */
@@ -114,7 +115,7 @@
     final IndexInput termsIn;
     final FieldInfo fieldInfo;
     long freqOffset;
-    long skipOffset;
+    int skipOffset;
     int docFreq;
 
     // TODO: abstraction violation (we are storing this with
@@ -152,7 +153,7 @@
       }
 
       if (docFreq >= skipInterval) {
-        skipOffset = termsIn.readVLong();
+        skipOffset = termsIn.readVInt();
       } else {
         skipOffset = 0;
       }
@@ -162,54 +163,45 @@
       }
     }
     
-    public class TermDictsReaderState extends State {
-      long termsInPos;
+    public class TermDictsReaderState extends CacheEntry {
       long freqOffset;
-      long skipOffset;
-      long freqInPos;
-      int freq;
-      long proxPos;
-      public long proxOffset;
+      int skipOffset;
+      long proxOffset;
     }
     
     @Override
-    public State captureState(State reusableState) {
+    public CacheEntry captureState(CacheEntry reusableState) {
       TermDictsReaderState state;
-      if(reusableState == null) {
+      if (reusableState == null) {
         state = new TermDictsReaderState();
       } else {
         state = (TermDictsReaderState) reusableState;
-        state.proxPos = 0;
-        state.proxOffset = 0;
       }
-      if(posReader != null) {
-        if(posReader.positions != null) {
-          state.proxPos = posReader.positions.proxIn.getFilePointer();
-        }
+      if (posReader != null) {
         state.proxOffset = posReader.proxOffset;
+      } else {
+        state.proxOffset = 0;
       }
-      state.termsInPos = termsIn.getFilePointer();
       state.freqOffset = freqOffset;
-      state.freqInPos = freqIn.getFilePointer();
-      state.freq = docFreq;
       state.skipOffset = skipOffset;
       return state;
     }
 
     @Override
-    public void setState(State state) throws IOException {
-      TermDictsReaderState readerState = (TermDictsReaderState)state;
+    public void setState(CacheEntry state, int docFreq) throws IOException {
+      TermDictsReaderState readerState = (TermDictsReaderState) state;
       skipOffset = readerState.skipOffset;
-      termsIn.seek(readerState.termsInPos);
       freqOffset = readerState.freqOffset;
-      freqIn.seek(readerState.freqInPos);
-      docFreq = readerState.freq;
+
+      this.docFreq = docFreq;
       
-      if(posReader != null) {
-        if(posReader.positions != null) {
-          posReader.positions.proxIn.seek(readerState.proxPos);
-        }
+      if (posReader != null) {
         posReader.proxOffset = readerState.proxOffset;
+        if (posReader.positions != null) {
+          posReader.positions.seekPending = true;
+          posReader.positions.skipOffset = posReader.proxOffset;
+          posReader.positions.skipPosCount = 0;
+        }
       }
     }
     

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsWriter.java?rev=825709&r1=825708&r2=825709&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsWriter.java
(original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardDocsWriter.java
Fri Oct 16 00:07:04 2009
@@ -83,8 +83,9 @@
 
   public void startTerm() {
     freqStart = out.getFilePointer();
-    if (!omitTermFreqAndPositions)
+    if (!omitTermFreqAndPositions) {
       posWriter.startTerm();
+    }
     skipListWriter.resetSkip();
   }
 
@@ -178,7 +179,7 @@
       if (Codec.DEBUG) {
         System.out.println(Thread.currentThread().getName() + ":  writeSkip @ freqFP=" +
out.getFilePointer() + " freqStartFP=" + freqStart);
       }
-      termsOut.writeVLong(skipListWriter.writeSkip(out)-freqStart);
+      termsOut.writeVInt((int) (skipListWriter.writeSkip(out)-freqStart));
     }
      
     if (!omitTermFreqAndPositions) {
@@ -194,8 +195,9 @@
   }
 
   public void close() throws IOException {
-    if (Codec.DEBUG)
+    if (Codec.DEBUG) {
       System.out.println("docs writer close pointer=" + out.getFilePointer());
+    }
     try {
       out.close();
     } finally {

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java?rev=825709&r1=825708&r2=825709&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
(original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
Fri Oct 16 00:07:04 2009
@@ -39,7 +39,6 @@
 import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.codecs.DocsProducer;
 import org.apache.lucene.index.codecs.FieldsProducer;
-import org.apache.lucene.index.codecs.DocsProducer.Reader.State;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Bits;
@@ -103,6 +102,7 @@
           fieldIndexReader = null;
         }
         if (numTerms > 0) {
+          assert !fields.containsKey(fieldInfo.name);
           fields.put(fieldInfo.name, new FieldReader(fieldIndexReader, fieldInfo, numTerms,
termsStartPointer));
         }
       }
@@ -191,8 +191,9 @@
   
   private class FieldReader extends Terms {
     private final CloseableThreadLocal threadResources = new CloseableThreadLocal();
+    // nocommit: not needed?
     // nocommit: check placement
-    Collection<ThreadResources> threadResourceSet = new HashSet<ThreadResources>();
+    //Collection<ThreadResources> threadResourceSet = new HashSet<ThreadResources>();
     final long numTerms;
     final FieldInfo fieldInfo;
     final long termsStartPointer;
@@ -206,20 +207,31 @@
       this.indexReader = fieldIndexReader;
     }
 
+    public int docFreq(TermRef text) throws IOException {
+      ThreadResources resources = getThreadResources();
+      if (resources.termsEnum.seek(text) == TermsEnum.SeekStatus.FOUND) {
+        return resources.termsEnum.docFreq();
+      } else {
+        return 0;
+      }
+    }
+
     public void close() {
       threadResources.close();
+      // nocommit should not be needed?
+      /*
       for(ThreadResources threadResource : threadResourceSet) {
         threadResource.termInfoCache = null;
       }
+      */
     }
     
-    private ThreadResources getThreadResources() {
-      ThreadResources resources = (ThreadResources)threadResources.get();
+    private ThreadResources getThreadResources() throws IOException {
+      ThreadResources resources = (ThreadResources) threadResources.get();
       if (resources == null) {
-        resources = new ThreadResources();
         // Cache does not have to be thread-safe, it is only used by one thread at the same
time
-        resources.termInfoCache = new ReuseLRUCache(1024);
-        threadResourceSet.add(resources);
+        resources = new ThreadResources(new SegmentTermsEnum(), numTerms);
+        //threadResourceSet.add(resources);
         threadResources.set(resources);
       }
       return resources;
@@ -232,7 +244,7 @@
     public long getUniqueTermCount() {
       return numTerms;
     }
-    ThreadResources resources = getThreadResources();
+
     // Iterates through terms in this field
     private class SegmentTermsEnum extends TermsEnum {
       private final IndexInput in;
@@ -242,7 +254,6 @@
       private final DocsProducer.Reader docs;
       private int docFreq;
       private final StandardTermsIndexReader.TermsIndexResult indexResult = new StandardTermsIndexReader.TermsIndexResult();
-
       
       SegmentTermsEnum() throws IOException {
         if (Codec.DEBUG) {
@@ -266,17 +277,19 @@
         CacheEntry entry = null;
 
         if (docs.canCaptureState()) {
-          cache = resources.termInfoCache;
+          final ThreadResources resources = getThreadResources();
+          cache = resources.cache;
 
           entry = (CacheEntry) cache.get(term);
           if (entry != null) {
             docFreq = entry.freq;
-            bytesReader.term = (TermRef) entry.term.clone();
-            docs.setState(entry.state);
+            bytesReader.term.copy(entry.term);
+            docs.setState(entry, docFreq);
             termUpto = entry.termUpTo;
-
+            // nocommit -- would be better to do this lazy?
+            in.seek(entry.filePointer);
             return SeekStatus.FOUND;
-          } 
+          }
         }
         
         // mxx
@@ -290,6 +303,7 @@
           if (Codec.DEBUG) {
             System.out.println(Thread.currentThread().getName() + ":  already here!");
           }
+          // nocommit -- cache this
           return SeekStatus.FOUND;
         }
 
@@ -330,21 +344,25 @@
               System.out.println(Thread.currentThread().getName() + ":  seek done found term="
+ bytesReader.term);
               //new Throwable().printStackTrace(System.out);
             }
-        
-            if(docs.canCaptureState() && scanCnt > 1) {
-             if(cache.eldest != null) {
-               entry = (CacheEntry) cache.eldest;
-               cache.eldest = null;
-               entry.state = docs.captureState(entry.state);
+
+            // nocommit -- why scanCnt > 1?
+            //if (docs.canCaptureState() && scanCnt > 1) {
+
+            if (docs.canCaptureState()) {
+              // Store in cache
+              if (cache.eldest != null) {
+                entry = (CacheEntry) cache.eldest;
+                cache.eldest = null;
+                docs.captureState(entry);
+                entry.term.copy((TermRef) bytesReader.term);
               } else {
-                entry = new CacheEntry();
-                entry.state = docs.captureState(null);
+                entry = docs.captureState(null);
+                entry.term = (TermRef) bytesReader.term.clone();
               }
               entry.freq = docFreq;
               entry.termUpTo = termUpto;
+              entry.filePointer = in.getFilePointer();
             
-              entry.term = (TermRef) bytesReader.term.clone();
-             
               cache.put(entry.term, entry);
             }
             return SeekStatus.FOUND;
@@ -461,22 +479,42 @@
     }
   }
 
-  private class CacheEntry {
+  // nocommit -- scrutinize API
+  public static class CacheEntry {
     int termUpTo;
     int freq;
-    State state;
+    long filePointer;
     TermRef term;
   }
+
+  private static final int MAX_CACHE_SIZE = 1024;
   
   /**
    * Per-thread resources managed by ThreadLocal
    */
-  private final class ThreadResources {
+  private static final class ThreadResources {
     // Used for caching the least recently looked-up Terms
-    ReuseLRUCache termInfoCache;
+    final ReuseLRUCache cache;
+    final TermsEnum termsEnum;
+
+    ThreadResources(TermsEnum termsEnum, long numTerms) {
+      final int cacheSize;
+      if (numTerms >= MAX_CACHE_SIZE) {
+        cacheSize = MAX_CACHE_SIZE;
+      } else if (numTerms < 1) {
+        cacheSize = 1;
+      } else {
+        cacheSize = (int) numTerms;
+      }
+
+      cache = new ReuseLRUCache(cacheSize);
+      this.termsEnum = termsEnum;
+    }
   }
-  
-  private class ReuseLRUCache extends LinkedHashMap {
+
+  // nocommit -- wonder if simple double-barrel LRU cache
+  // would be better
+  private static class ReuseLRUCache extends LinkedHashMap {
     
     private final static float LOADFACTOR = 0.75f;
     private int cacheSize;
@@ -486,18 +524,25 @@
      * Creates a last-recently-used cache with the specified size. 
      */
     public ReuseLRUCache(int cacheSize) {
+      // nocommit -- we should not init cache w/ full
+      // capacity?  init it at 0, and only start evicting
+      // once #entries is over our max
       super((int) Math.ceil(cacheSize/ LOADFACTOR) + 1, LOADFACTOR, true);
       this.cacheSize = cacheSize;
     }
     
     protected boolean removeEldestEntry(Map.Entry eldest) {
       boolean remove = size() > ReuseLRUCache.this.cacheSize;
-      if(remove) {
+      if (remove) {
         this.eldest = eldest.getValue();
       } 
       return remove;
     }
-    
+
+    // nocommit -- not needed?  we don't need to sync since
+    // only one thread works with this?
+
+    /*
     @Override
     public synchronized Object put(Object key, Object value) {
       // TODO Auto-generated method stub
@@ -509,6 +554,7 @@
       // TODO Auto-generated method stub
       return super.get(key);
     }
+    */
   }
 
 }



Mime
View raw message