was this why i saw strange benchmark results?
On Sun, Nov 22, 2009 at 9:52 AM, <mikemccand@apache.org> wrote:
> Author: mikemccand
> Date: Sun Nov 22 14:52:02 2009
> New Revision: 883088
>
> URL: http://svn.apache.org/viewvc?rev=883088&view=rev
> Log:
> LUCENE-1458 (on flex branch): small optimization to terms dict cache: don't
> store redundant TermRef
>
> Modified:
>
> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java
>
> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
>
> Modified:
> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java
> URL:
> http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java?rev=883088&r1=883087&r2=883088&view=diff
>
> ==============================================================================
> ---
> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java
> (original)
> +++
> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java
> Sun Nov 22 14:52:02 2009
> @@ -36,6 +36,8 @@
> copy(text);
> }
>
> + // nocommit: we could do this w/ UnicodeUtil w/o requiring
> + // allocation of new bytes[]?
> public void copy(String text) {
> try {
> bytes = text.getBytes("UTF-8");
>
> Modified:
> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
> URL:
> http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java?rev=883088&r1=883087&r2=883088&view=diff
>
> ==============================================================================
> ---
> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
> (original)
> +++
> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
> Sun Nov 22 14:52:02 2009
> @@ -304,6 +304,7 @@
> public SeekStatus seek(TermRef term) throws IOException {
> ReuseLRUCache<TermRef, CacheEntry> cache = null;
> CacheEntry entry = null;
> + TermRef entryKey = null;
>
> if (docs.canCaptureState()) {
> final ThreadResources resources = getThreadResources();
> @@ -312,7 +313,7 @@
> entry = cache.get(term);
> if (entry != null) {
> docFreq = entry.freq;
> - bytesReader.term.copy(entry.term);
> + bytesReader.term.copy(term);
> docs.setState(entry, docFreq);
> termUpto = entry.termUpTo;
> // nocommit -- would be better to do this lazy?
> @@ -384,16 +385,17 @@
> entry = cache.eldest;
> cache.eldest = null;
> docs.captureState(entry);
> - entry.term.copy(bytesReader.term);
> + entryKey = cache.eldestKey;
> + entryKey.copy(bytesReader.term);
> } else {
> entry = docs.captureState(null);
> - entry.term = (TermRef) bytesReader.term.clone();
> + entryKey = (TermRef) bytesReader.term.clone();
> }
> entry.freq = docFreq;
> entry.termUpTo = termUpto;
> entry.filePointer = in.getFilePointer();
>
> - cache.put(entry.term, entry);
> + cache.put(entryKey, entry);
> }
> return SeekStatus.FOUND;
> } else if (cmp > 0) {
> @@ -517,9 +519,8 @@
>
> // nocommit -- scrutinize API
> public static class CacheEntry {
> - int termUpTo;
> - TermRef term; // nocommit -- really needed?
> - long filePointer;
> + int termUpTo; // ord for this term
> + long filePointer; // fp into the terms
> dict primary file (_X.tis)
>
> // nocommit -- belongs in Pulsing's CacheEntry class:
> public int freq;
> @@ -563,6 +564,7 @@
> private final static float LOADFACTOR = 0.75f;
> private int cacheSize;
> V eldest;
> + K eldestKey;
>
> /**
> * Creates a last-recently-used cache with the specified size.
> @@ -580,6 +582,7 @@
> boolean remove = size() > ReuseLRUCache.this.cacheSize;
> if (remove) {
> this.eldest = eldest.getValue();
> + this.eldestKey = eldest.getKey();
> }
> return remove;
> }
>
>
>
--
Robert Muir
rcmuir@gmail.com
|