lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1457848 - in /lucene/dev/branches/lucene_solr_4_2: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/codecs/ lucene/core/src/java/org/apache/lucene/codecs/lucene42/ lucene/core/src/java/org/apache/lucene/index/ lucene/core/src...
Date Mon, 18 Mar 2013 16:37:52 GMT
Author: rmuir
Date: Mon Mar 18 16:37:51 2013
New Revision: 1457848

URL: http://svn.apache.org/r1457848
Log:
LUCENE-4819: move Sorted[Set]DocValuesTermsEnum to codec

Modified:
    lucene/dev/branches/lucene_solr_4_2/   (props changed)
    lucene/dev/branches/lucene_solr_4_2/lucene/   (props changed)
    lucene/dev/branches/lucene_solr_4_2/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene_solr_4_2/lucene/core/   (props changed)
    lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
    lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
    lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
    lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
    lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
    lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
    lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
    lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/search/DocTermOrdsRewriteMethod.java
    lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java
    lucene/dev/branches/lucene_solr_4_2/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
    lucene/dev/branches/lucene_solr_4_2/lucene/grouping/   (props changed)
    lucene/dev/branches/lucene_solr_4_2/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
    lucene/dev/branches/lucene_solr_4_2/lucene/test-framework/   (props changed)
    lucene/dev/branches/lucene_solr_4_2/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
    lucene/dev/branches/lucene_solr_4_2/solr/   (props changed)
    lucene/dev/branches/lucene_solr_4_2/solr/core/   (props changed)
    lucene/dev/branches/lucene_solr_4_2/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java

Modified: lucene/dev/branches/lucene_solr_4_2/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_2/lucene/CHANGES.txt?rev=1457848&r1=1457847&r2=1457848&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_2/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_4_2/lucene/CHANGES.txt Mon Mar 18 16:37:51 2013
@@ -14,6 +14,14 @@ Bug Fixes
   codecs. The special case of a null context ClassLoader is now also
   supported.  (Christian Kohlschütter, Uwe Schindler)
 
+* LUCENE-4819: seekExact(BytesRef, boolean) did not work correctly with 
+  Sorted[Set]DocValuesTermsEnum.  (Robert Muir)
+
+Optimizations
+
+* LUCENE-4819: Added Sorted[Set]DocValues.termsEnum(), and optimized the
+  default codec for improved enumeration performance.  (Robert Muir)
+
 ======================= Lucene 4.2.0 =======================
 
 Changes in backwards compatibility policy

Modified: lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java?rev=1457848&r1=1457847&r2=1457848&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
(original)
+++ lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
Mon Mar 18 16:37:51 2013
@@ -32,9 +32,7 @@ import org.apache.lucene.index.MultiDocV
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.SortedDocValues;
-import org.apache.lucene.index.SortedDocValuesTermsEnum;
 import org.apache.lucene.index.SortedSetDocValues;
-import org.apache.lucene.index.SortedSetDocValuesTermsEnum;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
@@ -269,7 +267,7 @@ public abstract class DocValuesConsumer 
       SortedDocValues dv = dvs[sub];
       Bits liveDocs = reader.getLiveDocs();
       if (liveDocs == null) {
-        liveTerms[sub] = new SortedDocValuesTermsEnum(dv);
+        liveTerms[sub] = dv.termsEnum();
       } else {
         OpenBitSet bitset = new OpenBitSet(dv.getValueCount());
         for (int i = 0; i < reader.maxDoc(); i++) {
@@ -277,7 +275,7 @@ public abstract class DocValuesConsumer 
             bitset.set(dv.getOrd(i));
           }
         }
-        liveTerms[sub] = new BitsFilteredTermsEnum(new SortedDocValuesTermsEnum(dv), bitset);
+        liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
       }
     }
     
@@ -401,7 +399,7 @@ public abstract class DocValuesConsumer 
       SortedSetDocValues dv = dvs[sub];
       Bits liveDocs = reader.getLiveDocs();
       if (liveDocs == null) {
-        liveTerms[sub] = new SortedSetDocValuesTermsEnum(dv);
+        liveTerms[sub] = dv.termsEnum();
       } else {
         OpenBitSet bitset = new OpenBitSet(dv.getValueCount());
         for (int i = 0; i < reader.maxDoc(); i++) {
@@ -413,7 +411,7 @@ public abstract class DocValuesConsumer 
             }
           }
         }
-        liveTerms[sub] = new BitsFilteredTermsEnum(new SortedSetDocValuesTermsEnum(dv), bitset);
+        liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
       }
     }
     

Modified: lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java?rev=1457848&r1=1457847&r2=1457848&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
(original)
+++ lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
Mon Mar 18 16:37:51 2013
@@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene4
  */
 
 import java.io.IOException;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -25,6 +26,8 @@ import org.apache.lucene.codecs.CodecUti
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexFileNames;
@@ -32,8 +35,10 @@ import org.apache.lucene.index.NumericDo
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
@@ -285,6 +290,11 @@ class Lucene42DocValuesProducer extends 
       public int getValueCount() {
         return (int)entry.numOrds;
       }
+
+      @Override
+      public TermsEnum termsEnum() {
+        return new FSTTermsEnum(fst);
+      }
     };
   }
   
@@ -369,6 +379,11 @@ class Lucene42DocValuesProducer extends 
       public long getValueCount() {
         return entry.numOrds;
       }
+
+      @Override
+      public TermsEnum termsEnum() {
+        return new FSTTermsEnum(fst);
+      }
     };
   }
 
@@ -396,4 +411,106 @@ class Lucene42DocValuesProducer extends 
     long offset;
     long numOrds;
   }
+  
+  // exposes FSTEnum directly as a TermsEnum: avoids binary-search next()
+  static class FSTTermsEnum extends TermsEnum {
+    final BytesRefFSTEnum<Long> in;
+    
+    // this is all for the complicated seek(ord)...
+    // maybe we should add a FSTEnum that supports this operation?
+    final FST<Long> fst;
+    final FST.BytesReader bytesReader;
+    final Arc<Long> firstArc = new Arc<Long>();
+    final Arc<Long> scratchArc = new Arc<Long>();
+    final IntsRef scratchInts = new IntsRef();
+    final BytesRef scratchBytes = new BytesRef();
+    
+    FSTTermsEnum(FST<Long> fst) {
+      this.fst = fst;
+      in = new BytesRefFSTEnum<Long>(fst);
+      bytesReader = fst.getBytesReader();
+    }
+
+    @Override
+    public BytesRef next() throws IOException {
+      InputOutput<Long> io = in.next();
+      if (io == null) {
+        return null;
+      } else {
+        return io.input;
+      }
+    }
+
+    @Override
+    public Comparator<BytesRef> getComparator() {
+      return BytesRef.getUTF8SortedAsUnicodeComparator();
+    }
+
+    @Override
+    public SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException {
+      if (in.seekCeil(text) == null) {
+        return SeekStatus.END;
+      } else if (term().equals(text)) {
+        // TODO: add SeekStatus to FSTEnum like in https://issues.apache.org/jira/browse/LUCENE-3729
+        // to remove this comparision?
+        return SeekStatus.FOUND;
+      } else {
+        return SeekStatus.NOT_FOUND;
+      }
+    }
+
+    @Override
+    public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
+      if (in.seekExact(text) == null) {
+        return false;
+      } else {
+        return true;
+      }
+    }
+
+    @Override
+    public void seekExact(long ord) throws IOException {
+      // TODO: would be better to make this simpler and faster.
+      // but we dont want to introduce a bug that corrupts our enum state!
+      bytesReader.setPosition(0);
+      fst.getFirstArc(firstArc);
+      IntsRef output = Util.getByOutput(fst, ord, bytesReader, firstArc, scratchArc, scratchInts);
+      scratchBytes.bytes = new byte[output.length];
+      scratchBytes.offset = 0;
+      scratchBytes.length = 0;
+      Util.toBytesRef(output, scratchBytes);
+      // TODO: we could do this lazily, better to try to push into FSTEnum though?
+      in.seekExact(scratchBytes);
+    }
+
+    @Override
+    public BytesRef term() throws IOException {
+      return in.current().input;
+    }
+
+    @Override
+    public long ord() throws IOException {
+      return in.current().output;
+    }
+
+    @Override
+    public int docFreq() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long totalTermFreq() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse,
int flags) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+  }
 }

Modified: lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java?rev=1457848&r1=1457847&r2=1457848&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
(original)
+++ lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
Mon Mar 18 16:37:51 2013
@@ -217,7 +217,7 @@ public class MultiDocValues {
     } else {
       TermsEnum enums[] = new TermsEnum[values.length];
       for (int i = 0; i < values.length; i++) {
-        enums[i] = new SortedDocValuesTermsEnum(values[i]);
+        enums[i] = values[i].termsEnum();
       }
       OrdinalMap mapping = new OrdinalMap(r.getCoreCacheKey(), enums);
       return new MultiSortedDocValues(values, starts, mapping);
@@ -261,7 +261,7 @@ public class MultiDocValues {
     } else {
       TermsEnum enums[] = new TermsEnum[values.length];
       for (int i = 0; i < values.length; i++) {
-        enums[i] = new SortedSetDocValuesTermsEnum(values[i]);
+        enums[i] = values[i].termsEnum();
       }
       OrdinalMap mapping = new OrdinalMap(r.getCoreCacheKey(), enums);
       return new MultiSortedSetDocValues(values, starts, mapping);

Modified: lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java?rev=1457848&r1=1457847&r2=1457848&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
(original)
+++ lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
Mon Mar 18 16:37:51 2013
@@ -114,4 +114,12 @@ public abstract class SortedDocValues ex
 
     return -(low + 1);  // key not found.
   }
+  
+  /** 
+   * Returns a {@link TermsEnum} over the values.
+   * The enum supports {@link TermsEnum#ord()} and {@link TermsEnum#seekExact(long)}.
+   */
+  public TermsEnum termsEnum() {
+    return new SortedDocValuesTermsEnum(this);
+  }
 }

Modified: lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java?rev=1457848&r1=1457847&r2=1457848&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
(original)
+++ lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
Mon Mar 18 16:37:51 2013
@@ -26,7 +26,7 @@ import org.apache.lucene.util.BytesRef;
 /** Implements a {@link TermsEnum} wrapping a provided
  * {@link SortedDocValues}. */
 
-public class SortedDocValuesTermsEnum extends TermsEnum {
+class SortedDocValuesTermsEnum extends TermsEnum {
   private final SortedDocValues values;
   private int currentOrd = -1;
   private final BytesRef term = new BytesRef();
@@ -64,6 +64,12 @@ public class SortedDocValuesTermsEnum ex
   public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
     int ord = values.lookupTerm(text);
     if (ord >= 0) {
+      term.offset = 0;
+      // TODO: is there a cleaner way?
+      // term.bytes may be pointing to codec-private byte[]
+      // storage, so we must force new byte[] allocation:
+      term.bytes = new byte[text.length];
+      term.copyBytes(text);
       currentOrd = ord;
       return true;
     } else {

Modified: lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java?rev=1457848&r1=1457847&r2=1457848&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
(original)
+++ lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
Mon Mar 18 16:37:51 2013
@@ -117,4 +117,12 @@ public abstract class SortedSetDocValues
 
     return -(low + 1);  // key not found.
   }
+  
+  /** 
+   * Returns a {@link TermsEnum} over the values.
+   * The enum supports {@link TermsEnum#ord()} and {@link TermsEnum#seekExact(long)}.
+   */
+  public TermsEnum termsEnum() {
+    return new SortedSetDocValuesTermsEnum(this);
+  }
 }

Modified: lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java?rev=1457848&r1=1457847&r2=1457848&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
(original)
+++ lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
Mon Mar 18 16:37:51 2013
@@ -26,7 +26,7 @@ import org.apache.lucene.util.BytesRef;
 /** Implements a {@link TermsEnum} wrapping a provided
  * {@link SortedSetDocValues}. */
 
-public class SortedSetDocValuesTermsEnum extends TermsEnum {
+class SortedSetDocValuesTermsEnum extends TermsEnum {
   private final SortedSetDocValues values;
   private long currentOrd = -1;
   private final BytesRef term = new BytesRef();
@@ -64,6 +64,12 @@ public class SortedSetDocValuesTermsEnum
   public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
     long ord = values.lookupTerm(text);
     if (ord >= 0) {
+      term.offset = 0;
+      // TODO: is there a cleaner way?
+      // term.bytes may be pointing to codec-private byte[]
+      // storage, so we must force new byte[] allocation:
+      term.bytes = new byte[text.length];
+      term.copyBytes(text);
       currentOrd = ord;
       return true;
     } else {

Modified: lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/search/DocTermOrdsRewriteMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/search/DocTermOrdsRewriteMethod.java?rev=1457848&r1=1457847&r2=1457848&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/search/DocTermOrdsRewriteMethod.java
(original)
+++ lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/search/DocTermOrdsRewriteMethod.java
Mon Mar 18 16:37:51 2013
@@ -23,7 +23,6 @@ import java.util.Comparator;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.SortedSetDocValues;
-import org.apache.lucene.index.SortedSetDocValuesTermsEnum;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.Bits;
@@ -98,7 +97,7 @@ public final class DocTermOrdsRewriteMet
         
         @Override
         public TermsEnum iterator(TermsEnum reuse) {
-          return new SortedSetDocValuesTermsEnum(docTermOrds);
+          return docTermOrds.termsEnum();
         }
 
         @Override

Modified: lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java?rev=1457848&r1=1457847&r2=1457848&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java
(original)
+++ lucene/dev/branches/lucene_solr_4_2/lucene/core/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java
Mon Mar 18 16:37:51 2013
@@ -23,7 +23,6 @@ import java.util.Comparator;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.SortedDocValues;
-import org.apache.lucene.index.SortedDocValuesTermsEnum;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.Bits;
@@ -98,7 +97,7 @@ public final class FieldCacheRewriteMeth
         
         @Override
         public TermsEnum iterator(TermsEnum reuse) {
-          return new SortedDocValuesTermsEnum(fcsi);
+          return fcsi.termsEnum();
         }
 
         @Override

Modified: lucene/dev/branches/lucene_solr_4_2/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_2/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java?rev=1457848&r1=1457847&r2=1457848&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_2/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
(original)
+++ lucene/dev/branches/lucene_solr_4_2/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
Mon Mar 18 16:37:51 2013
@@ -219,7 +219,7 @@ public class TestFieldCache extends Luce
 
     int nTerms = termsIndex.getValueCount();
 
-    TermsEnum tenum = new SortedDocValuesTermsEnum(termsIndex);
+    TermsEnum tenum = termsIndex.termsEnum();
     BytesRef val = new BytesRef();
     for (int i=0; i<nTerms; i++) {
       BytesRef val1 = tenum.next();

Modified: lucene/dev/branches/lucene_solr_4_2/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_2/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java?rev=1457848&r1=1457847&r2=1457848&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_2/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
(original)
+++ lucene/dev/branches/lucene_solr_4_2/lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
Mon Mar 18 16:37:51 2013
@@ -24,9 +24,7 @@ import java.util.List;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.DocTermOrds;
 import org.apache.lucene.index.SortedDocValues;
-import org.apache.lucene.index.SortedDocValuesTermsEnum;
 import org.apache.lucene.index.SortedSetDocValues;
-import org.apache.lucene.index.SortedSetDocValuesTermsEnum;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.FieldCache;
 import org.apache.lucene.search.grouping.AbstractGroupFacetCollector;
@@ -170,7 +168,7 @@ public abstract class TermGroupFacetColl
 
     @Override
     protected SegmentResult createSegmentResult() throws IOException {
-      return new SegmentResult(segmentFacetCounts, segmentTotalCount, new SortedDocValuesTermsEnum(facetFieldTermsIndex),
startFacetOrd, endFacetOrd);
+      return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldTermsIndex.termsEnum(),
startFacetOrd, endFacetOrd);
     }
 
     private static class SegmentResult extends AbstractGroupFacetCollector.SegmentResult
{
@@ -289,7 +287,7 @@ public abstract class TermGroupFacetColl
       if (facetFieldNumTerms == 0) {
         facetOrdTermsEnum = null;
       } else {
-        facetOrdTermsEnum = new SortedSetDocValuesTermsEnum(facetFieldDocTermOrds);
+        facetOrdTermsEnum = facetFieldDocTermOrds.termsEnum();
       }
       // [facetFieldNumTerms() + 1] for all possible facet values and docs not containing
facet field
       segmentFacetCounts = new int[facetFieldNumTerms + 1];

Modified: lucene/dev/branches/lucene_solr_4_2/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_2/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java?rev=1457848&r1=1457847&r2=1457848&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_2/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
(original)
+++ lucene/dev/branches/lucene_solr_4_2/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
Mon Mar 18 16:37:51 2013
@@ -45,6 +45,7 @@ import org.apache.lucene.document.Stored
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.FieldInfo.DocValuesType;
+import org.apache.lucene.index.TermsEnum.SeekStatus;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.FieldCache;
@@ -702,6 +703,77 @@ public abstract class BaseDocValuesForma
     directory.close();
   }
   
+  public void testSortedTermsEnum() throws IOException {
+    Directory directory = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+    iwconfig.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+    
+    Document doc = new Document();
+    doc.add(new SortedDocValuesField("field", new BytesRef("hello")));
+    iwriter.addDocument(doc);
+    
+    doc = new Document();
+    doc.add(new SortedDocValuesField("field", new BytesRef("world")));
+    iwriter.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new SortedDocValuesField("field", new BytesRef("beer")));
+    iwriter.addDocument(doc);
+    iwriter.forceMerge(1);
+    
+    DirectoryReader ireader = iwriter.getReader();
+    iwriter.close();
+
+    SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field");
+    assertEquals(3, dv.getValueCount());
+    
+    TermsEnum termsEnum = dv.termsEnum();
+    
+    // next()
+    assertEquals("beer", termsEnum.next().utf8ToString());
+    assertEquals(0, termsEnum.ord());
+    assertEquals("hello", termsEnum.next().utf8ToString());
+    assertEquals(1, termsEnum.ord());
+    assertEquals("world", termsEnum.next().utf8ToString());
+    assertEquals(2, termsEnum.ord());
+    
+    // seekCeil()
+    assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
+    assertEquals("hello", termsEnum.term().utf8ToString());
+    assertEquals(1, termsEnum.ord());
+    assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
+    assertEquals("beer", termsEnum.term().utf8ToString());
+    assertEquals(0, termsEnum.ord());
+    assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
+    
+    // seekExact()
+    assertTrue(termsEnum.seekExact(new BytesRef("beer"), true));
+    assertEquals("beer", termsEnum.term().utf8ToString());
+    assertEquals(0, termsEnum.ord());
+    assertTrue(termsEnum.seekExact(new BytesRef("hello"), true));
+    assertEquals(Codec.getDefault().toString(), "hello", termsEnum.term().utf8ToString());
+    assertEquals(1, termsEnum.ord());
+    assertTrue(termsEnum.seekExact(new BytesRef("world"), true));
+    assertEquals("world", termsEnum.term().utf8ToString());
+    assertEquals(2, termsEnum.ord());
+    assertFalse(termsEnum.seekExact(new BytesRef("bogus"), true));
+
+    // seek(ord)
+    termsEnum.seekExact(0);
+    assertEquals("beer", termsEnum.term().utf8ToString());
+    assertEquals(0, termsEnum.ord());
+    termsEnum.seekExact(1);
+    assertEquals("hello", termsEnum.term().utf8ToString());
+    assertEquals(1, termsEnum.ord());
+    termsEnum.seekExact(2);
+    assertEquals("world", termsEnum.term().utf8ToString());
+    assertEquals(2, termsEnum.ord());
+    ireader.close();
+    directory.close();
+  }
+  
   public void testEmptySortedBytes() throws IOException {
     Analyzer analyzer = new MockAnalyzer(random());
 
@@ -1658,6 +1730,71 @@ public abstract class BaseDocValuesForma
     directory.close();
   }
   
+  public void testSortedSetTermsEnum() throws IOException {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    Directory directory = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+    iwconfig.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+    
+    Document doc = new Document();
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
+    doc.add(new SortedSetDocValuesField("field", new BytesRef("beer")));
+    iwriter.addDocument(doc);
+    
+    DirectoryReader ireader = iwriter.getReader();
+    iwriter.close();
+
+    SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
+    assertEquals(3, dv.getValueCount());
+    
+    TermsEnum termsEnum = dv.termsEnum();
+    
+    // next()
+    assertEquals("beer", termsEnum.next().utf8ToString());
+    assertEquals(0, termsEnum.ord());
+    assertEquals("hello", termsEnum.next().utf8ToString());
+    assertEquals(1, termsEnum.ord());
+    assertEquals("world", termsEnum.next().utf8ToString());
+    assertEquals(2, termsEnum.ord());
+    
+    // seekCeil()
+    assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
+    assertEquals("hello", termsEnum.term().utf8ToString());
+    assertEquals(1, termsEnum.ord());
+    assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
+    assertEquals("beer", termsEnum.term().utf8ToString());
+    assertEquals(0, termsEnum.ord());
+    assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
+    
+    // seekExact()
+    assertTrue(termsEnum.seekExact(new BytesRef("beer"), true));
+    assertEquals("beer", termsEnum.term().utf8ToString());
+    assertEquals(0, termsEnum.ord());
+    assertTrue(termsEnum.seekExact(new BytesRef("hello"), true));
+    assertEquals("hello", termsEnum.term().utf8ToString());
+    assertEquals(1, termsEnum.ord());
+    assertTrue(termsEnum.seekExact(new BytesRef("world"), true));
+    assertEquals("world", termsEnum.term().utf8ToString());
+    assertEquals(2, termsEnum.ord());
+    assertFalse(termsEnum.seekExact(new BytesRef("bogus"), true));
+
+    // seek(ord)
+    termsEnum.seekExact(0);
+    assertEquals("beer", termsEnum.term().utf8ToString());
+    assertEquals(0, termsEnum.ord());
+    termsEnum.seekExact(1);
+    assertEquals("hello", termsEnum.term().utf8ToString());
+    assertEquals(1, termsEnum.ord());
+    termsEnum.seekExact(2);
+    assertEquals("world", termsEnum.term().utf8ToString());
+    assertEquals(2, termsEnum.ord());
+    ireader.close();
+    directory.close();
+  }
+  
   private void doTestSortedSetVsStoredFields(int minLength, int maxLength) throws Exception
{
     Directory dir = newDirectory();
     IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));

Modified: lucene/dev/branches/lucene_solr_4_2/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_2/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java?rev=1457848&r1=1457847&r2=1457848&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_2/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java
(original)
+++ lucene/dev/branches/lucene_solr_4_2/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java
Mon Mar 18 16:37:51 2013
@@ -23,7 +23,6 @@ import java.util.concurrent.*;
 
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.SortedDocValues;
-import org.apache.lucene.index.SortedDocValuesTermsEnum;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
@@ -155,7 +154,7 @@ class PerSegmentSingleValuedFaceting {
           seg.pos = seg.startTermIndex;
         }
         if (seg.pos < seg.endTermIndex) {
-          seg.tenum = new SortedDocValuesTermsEnum(seg.si);
+          seg.tenum = seg.si.termsEnum();
           seg.tenum.seekExact(seg.pos);
           seg.tempBR = seg.tenum.term();
           queue.add(seg);



Mime
View raw message