lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jpou...@apache.org
Subject svn commit: r1441379 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/codecs/ lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/ lucene/core/ lucene/core/src/test/org/apache/lucene/codecs/compressing/ lucene/core/src/test/org/apache/luce...
Date Fri, 01 Feb 2013 10:22:04 GMT
Author: jpountz
Date: Fri Feb  1 10:22:04 2013
New Revision: 1441379

URL: http://svn.apache.org/viewvc?rev=1441379&view=rev
Log:
LUCENE-4733: Refactor term vectors formats tests around a BaseTermVectorsFormatTestCase (merged
from r1441367).

Added:
    lucene/dev/branches/branch_4x/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextTermVectorsFormat.java
      - copied unchanged from r1441367, lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextTermVectorsFormat.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java
      - copied unchanged from r1441367, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestLucene3xTermVectorsFormat.java
  (with props)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40TermVectorsFormat.java
      - copied unchanged from r1441367, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40TermVectorsFormat.java
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
      - copied unchanged from r1441367, lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/codecs/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestPayloadsOnVectors.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java
    lucene/dev/branches/branch_4x/lucene/test-framework/   (props changed)

Added: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestLucene3xTermVectorsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestLucene3xTermVectorsFormat.java?rev=1441379&view=auto
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestLucene3xTermVectorsFormat.java
(added)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/lucene3x/TestLucene3xTermVectorsFormat.java
Fri Feb  1 10:22:04 2013
@@ -0,0 +1,45 @@
+package org.apache.lucene.codecs.lucene3x;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.EnumSet;
+import java.util.Set;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.BaseTermVectorsFormatTestCase;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestLucene3xTermVectorsFormat extends BaseTermVectorsFormatTestCase {
+
+  @Override
+  public void setUp() throws Exception {
+    LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE = true;
+    super.setUp();
+  }
+
+  @Override
+  protected Codec getCodec() {
+    return new PreFlexRWCodec();
+  }
+
+  @Override
+  protected Set<Options> validOptions() {
+    return EnumSet.range(Options.NONE, Options.POSITIONS_AND_OFFSETS);
+  }
+
+}

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestPayloadsOnVectors.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestPayloadsOnVectors.java?rev=1441379&r1=1441378&r2=1441379&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestPayloadsOnVectors.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestPayloadsOnVectors.java
Fri Feb  1 10:22:04 2013
@@ -17,39 +17,22 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
-import java.io.IOException;
 import java.io.StringReader;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
 
 import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.IntField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
-import org.apache.lucene.util._TestUtil;
-
-import com.carrotsearch.randomizedtesting.generators.RandomInts;
-import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 
 @SuppressCodecs("Lucene3x")
 public class TestPayloadsOnVectors extends LuceneTestCase {
@@ -160,314 +143,5 @@ public class TestPayloadsOnVectors exten
     writer.close();
     dir.close();
   }
-  
-  // custom impl to test cases that are forbidden by the default OffsetAttribute impl
-  static class PermissiveOffsetAttributeImpl extends AttributeImpl implements OffsetAttribute
{
-
-    int start, end;
-
-    @Override
-    public int startOffset() {
-      return start;
-    }
-
-    @Override
-    public int endOffset() {
-      return end;
-    }
-
-    @Override
-    public void setOffset(int startOffset, int endOffset) {
-      // no check!
-      start = startOffset;
-      end = endOffset;
-    }
-
-    @Override
-    public void clear() {
-      start = end = 0;
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if (other == this) {
-        return true;
-      }
-
-      if (other instanceof PermissiveOffsetAttributeImpl) {
-        PermissiveOffsetAttributeImpl o = (PermissiveOffsetAttributeImpl) other;
-        return o.start == start && o.end == end;
-      }
-
-      return false;
-    }
-
-    @Override
-    public int hashCode() {
-      return start + 31 * end;
-    }
-
-    @Override
-    public void copyTo(AttributeImpl target) {
-      OffsetAttribute t = (OffsetAttribute) target;
-      t.setOffset(start, end);
-    }
-
-  }
-
-  static BytesRef randomPayload() {
-    final int len = random().nextInt(5);
-    if (len == 0) {
-      return null;
-    }
-    final BytesRef payload = new BytesRef(len);
-    random().nextBytes(payload.bytes);
-    payload.length = len;
-    return payload;
-  }
-
-  class RandomTokenStream extends TokenStream {
-
-    final String[] terms;
-    final int[] positionsIncrements;
-    final int[] positions;
-    final int[] startOffsets, endOffsets;
-    final BytesRef[] payloads;
-
-    final Map<Integer, Set<Integer>> positionToTerms;
-    final Map<Integer, Set<Integer>> startOffsetToTerms;
 
-    final CharTermAttribute termAtt;
-    final PositionIncrementAttribute piAtt;
-    final OffsetAttribute oAtt;
-    final PayloadAttribute pAtt;
-    int i = 0;
-
-    RandomTokenStream(int len, String[] sampleTerms, boolean weird) {
-      terms = new String[len];
-      positionsIncrements = new int[len];
-      positions = new int[len];
-      startOffsets = new int[len];
-      endOffsets = new int[len];
-      payloads = new BytesRef[len];
-      for (int i = 0; i < len; ++i) {
-        terms[i] = RandomPicks.randomFrom(random(), sampleTerms);
-        if (weird) {
-          positionsIncrements[i] = _TestUtil.nextInt(random(), 1, 1 << 18);
-          startOffsets[i] = random().nextInt();
-          endOffsets[i] = random().nextInt();
-        } else if (i == 0) {
-          positionsIncrements[i] = _TestUtil.nextInt(random(), 1, 1 << 5);
-          startOffsets[i] = _TestUtil.nextInt(random(), 0, 1 << 16);
-          endOffsets[i] = startOffsets[i] + _TestUtil.nextInt(random(), 0, rarely() ? 1 <<
10 : 20);
-        } else {
-          positionsIncrements[i] = _TestUtil.nextInt(random(), 0, 1 << 5);
-          startOffsets[i] = startOffsets[i-1] + _TestUtil.nextInt(random(), 0, 1 <<
16);
-          endOffsets[i] = startOffsets[i] + _TestUtil.nextInt(random(), 0, rarely() ? 1 <<
10 : 20);
-        }
-      }
-      for (int i = 0; i < len; ++i) {
-        if (i == 0) {
-          positions[i] = positionsIncrements[i] - 1;
-        } else {
-          positions[i] = positions[i - 1] + positionsIncrements[i];
-        }
-      }
-      if (rarely()) {
-        Arrays.fill(payloads, randomPayload());
-      } else {
-        for (int i = 0; i < len; ++i) {
-          payloads[i] = randomPayload();
-        }
-      }
-
-      positionToTerms = new HashMap<Integer, Set<Integer>>();
-      startOffsetToTerms = new HashMap<Integer, Set<Integer>>();
-      for (int i = 0; i < len; ++i) {
-        if (!positionToTerms.containsKey(positions[i])) {
-          positionToTerms.put(positions[i], new HashSet<Integer>(1));
-        }
-        positionToTerms.get(positions[i]).add(i);
-        if (!startOffsetToTerms.containsKey(startOffsets[i])) {
-          startOffsetToTerms.put(startOffsets[i], new HashSet<Integer>(1));
-        }
-        startOffsetToTerms.get(startOffsets[i]).add(i);
-      }
-
-      addAttributeImpl(new PermissiveOffsetAttributeImpl());
-
-      termAtt = addAttribute(CharTermAttribute.class);
-      piAtt = addAttribute(PositionIncrementAttribute.class);
-      oAtt = addAttribute(OffsetAttribute.class);
-      pAtt = addAttribute(PayloadAttribute.class);
-    }
-
-    @Override
-    public final boolean incrementToken() throws IOException {
-      if (i < terms.length) {
-        termAtt.setLength(0).append(terms[i]);
-        piAtt.setPositionIncrement(positionsIncrements[i]);
-        oAtt.setOffset(startOffsets[i], endOffsets[i]);
-        pAtt.setPayload(payloads[i]);
-        ++i;
-        return true;
-      } else {
-        return false;
-      }
-    }
-
-  }
-
-  static FieldType randomFieldType() {
-    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
-    ft.setStoreTermVectors(true);
-    ft.setStoreTermVectorPositions(random().nextBoolean());
-    ft.setStoreTermVectorOffsets(random().nextBoolean());
-    if (random().nextBoolean()) {
-      ft.setStoreTermVectorPositions(true);
-      ft.setStoreTermVectorPayloads(true);
-    }
-    ft.freeze();
-    return ft;
-  }
-
-  public void testRandomVectors() throws IOException {
-    Directory dir = newDirectory();
-    IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
-    iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
-    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
-    String[] sampleTerms = new String[RandomInts.randomIntBetween(random(), 20, 50)];
-    for (int i = 0; i < sampleTerms.length; ++i) {
-      sampleTerms[i] = _TestUtil.randomUnicodeString(random());
-    }
-    FieldType ft = randomFieldType();
-    // generate random documents and index them
-    final String[] fieldNames = new String[_TestUtil.nextInt(random(), 1, 200)];
-    for (int i = 0; i < fieldNames.length; ++i) {
-      String fieldName;
-      do {
-        fieldName = _TestUtil.randomSimpleString(random());
-      } while ("id".equals(fieldName));
-      fieldNames[i] = fieldName;
-    }
-    final int numDocs = _TestUtil.nextInt(random(), 10, 100);
-    @SuppressWarnings("unchecked")
-    final Map<String, RandomTokenStream>[] fieldValues  = new Map[numDocs];
-    for (int i = 0; i < numDocs; ++i) {
-      fieldValues[i] = new HashMap<String, RandomTokenStream>();
-      final int numFields = _TestUtil.nextInt(random(), 0, rarely() ? fieldNames.length :
5);
-      for (int j = 0; j < numFields; ++j) {
-        final String fieldName = fieldNames[(i+j*31) % fieldNames.length];
-        final int tokenStreamLen = _TestUtil.nextInt(random(), 1, rarely() ? 300 : 5);
-        fieldValues[i].put(fieldName, new RandomTokenStream(tokenStreamLen, sampleTerms,
rarely()));
-      }
-    }
-
-    // index them
-    for (int i = 0; i < numDocs; ++i) {
-      Document doc = new Document();
-      doc.add(new IntField("id", i, Store.YES));
-      for (Map.Entry<String, RandomTokenStream> entry : fieldValues[i].entrySet())
{
-        doc.add(new Field(entry.getKey(), entry.getValue(), ft));
-      }
-      iw.addDocument(doc);
-    }
-
-    iw.commit();
-    // make sure the format can merge
-    iw.forceMerge(2);
-
-    // read term vectors
-    final DirectoryReader reader = DirectoryReader.open(dir);
-    for (int i = 0; i < 100; ++i) {
-      final int docID = random().nextInt(numDocs);
-      final Map<String, RandomTokenStream> fvs = fieldValues[reader.document(docID).getField("id").numericValue().intValue()];
-      final Fields fields = reader.getTermVectors(docID);
-      if (fvs.isEmpty()) {
-        assertNull(fields);
-      } else {
-        Set<String> fns = new HashSet<String>();
-        for (String field : fields) {
-          fns.add(field);
-        }
-        assertEquals(fields.size(), fns.size());
-        assertEquals(fvs.keySet(), fns);
-        for (String field : fields) {
-          final RandomTokenStream tk = fvs.get(field);
-          assert tk != null;
-          final Terms terms = fields.terms(field);
-          assertEquals(ft.storeTermVectorPositions(), terms.hasPositions());
-          assertEquals(ft.storeTermVectorOffsets(), terms.hasOffsets());
-          assertEquals(1, terms.getDocCount());
-          final TermsEnum termsEnum = terms.iterator(null);
-          while (termsEnum.next() != null) {
-            assertEquals(1, termsEnum.docFreq());
-            final DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.docsAndPositions(null,
null);
-            final DocsEnum docsEnum = docsAndPositionsEnum == null ? termsEnum.docs(null,
null) : docsAndPositionsEnum;
-            if (ft.storeTermVectorOffsets() || ft.storeTermVectorPositions()) {
-              assertNotNull(docsAndPositionsEnum);
-            }
-            assertEquals(0, docsEnum.nextDoc());
-            if (terms.hasPositions() || terms.hasOffsets()) {
-              final int freq = docsEnum.freq();
-              assertTrue(freq >= 1);
-              if (docsAndPositionsEnum != null) {
-                for (int k = 0; k < freq; ++k) {
-                  final int position = docsAndPositionsEnum.nextPosition();
-                  final Set<Integer> indexes;
-                  if (terms.hasPositions()) {
-                    indexes = tk.positionToTerms.get(position);
-                    assertNotNull(tk.positionToTerms.keySet().toString() + " does not contain
" + position, indexes);
-                  } else {
-                    indexes = tk.startOffsetToTerms.get(docsAndPositionsEnum.startOffset());
-                    assertNotNull(indexes);
-                  }
-                  if (terms.hasPositions()) {
-                    boolean foundPosition = false;
-                    for (int index : indexes) {
-                      if (new BytesRef(tk.terms[index]).equals(termsEnum.term()) &&
tk.positions[index] == position) {
-                        foundPosition = true;
-                        break;
-                      }
-                    }
-                    assertTrue(foundPosition);
-                  }
-                  if (terms.hasOffsets()) {
-                    boolean foundOffset = false;
-                    for (int index : indexes) {
-                      if (new BytesRef(tk.terms[index]).equals(termsEnum.term()) &&
tk.startOffsets[index] == docsAndPositionsEnum.startOffset() && tk.endOffsets[index]
== docsAndPositionsEnum.endOffset()) {
-                        foundOffset = true;
-                        break;
-                      }
-                    }
-                    assertTrue(foundOffset);
-                  }
-                  if (terms.hasPayloads()) {
-                    boolean foundPayload = false;
-                    for (int index : indexes) {
-                      if (new BytesRef(tk.terms[index]).equals(termsEnum.term()) &&
equals(tk.payloads[index], docsAndPositionsEnum.getPayload())) {
-                        foundPayload = true;
-                        break;
-                      }
-                    }
-                    assertTrue(foundPayload);
-                  }
-                }
-              }
-            }
-            assertEquals(DocsEnum.NO_MORE_DOCS, docsEnum.nextDoc());
-          }
-        }
-      }
-    }
-    IOUtils.close(reader, iw, dir);
-  }
-
-  private static boolean equals(Object o1, Object o2) {
-    if (o1 == null) {
-      return o2 == null;
-    } else {
-      return o1.equals(o2);
-    }
-  }
 }

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java?rev=1441379&r1=1441378&r2=1441379&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java
Fri Feb  1 10:22:04 2013
@@ -18,9 +18,6 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
@@ -28,19 +25,24 @@ import org.apache.lucene.document.Docume
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.*;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.English;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 
 public class TestTermVectors extends LuceneTestCase {
-  private static IndexSearcher searcher;
   private static IndexReader reader;
   private static Directory directory;
 
@@ -75,7 +77,6 @@ public class TestTermVectors extends Luc
     }
     reader = writer.getReader();
     writer.close();
-    searcher = newSearcher(reader);
   }
   
   @AfterClass
@@ -84,300 +85,8 @@ public class TestTermVectors extends Luc
     directory.close();
     reader = null;
     directory = null;
-    searcher = null;
   }
 
-  public void test() {
-    assertTrue(searcher != null);
-  }
-
-  public void testTermVectors() throws IOException {
-    Query query = new TermQuery(new Term("field", "seventy"));
-    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals(100, hits.length);
-      
-    for (int i = 0; i < hits.length; i++) {
-      Fields vectors = searcher.reader.getTermVectors(hits[i].doc);
-      assertNotNull(vectors);
-      assertEquals("doc=" + hits[i].doc + " tv=" + vectors, 1, vectors.size());
-    }
-    Terms vector;
-    vector = searcher.reader.getTermVectors(hits[0].doc).terms("noTV");
-    assertNull(vector);
-  }
-  
-  public void testTermVectorsFieldOrder() throws IOException {
-    Directory dir = newDirectory();
-    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, new MockAnalyzer(random(),
MockTokenizer.SIMPLE, true));
-    Document doc = new Document();
-    FieldType ft = new FieldType(TextField.TYPE_STORED);
-    ft.setStoreTermVectors(true);
-    ft.setStoreTermVectorOffsets(true);
-    ft.setStoreTermVectorPositions(true);
-    doc.add(newField("c", "some content here", ft));
-    doc.add(newField("a", "some content here", ft));
-    doc.add(newField("b", "some content here", ft));
-    doc.add(newField("x", "some content here", ft));
-    writer.addDocument(doc);
-    IndexReader reader = writer.getReader();
-    writer.close();
-    Fields v = reader.getTermVectors(0);
-    assertEquals(4, v.size());
-    String[] expectedFields = new String[]{"a", "b", "c", "x"};
-    int[] expectedPositions = new int[]{1, 2, 0};
-    Iterator<String> fieldsEnum = v.iterator();
-    for(int i=0;i<expectedFields.length;i++) {
-      assertEquals(expectedFields[i], fieldsEnum.next());
-      assertEquals(3, v.terms(expectedFields[i]).size());
-
-      DocsAndPositionsEnum dpEnum = null;
-      Terms terms = v.terms(expectedFields[i]);
-      assertNotNull(terms);
-      TermsEnum termsEnum = terms.iterator(null);
-      assertEquals("content", termsEnum.next().utf8ToString());
-      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
-      assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
-      assertEquals(1, dpEnum.freq());
-      assertEquals(expectedPositions[0], dpEnum.nextPosition());
-
-      assertEquals("here", termsEnum.next().utf8ToString());
-      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
-      assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
-      assertEquals(1, dpEnum.freq());
-      assertEquals(expectedPositions[1], dpEnum.nextPosition());
-
-      assertEquals("some", termsEnum.next().utf8ToString());
-      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
-      assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
-      assertEquals(1, dpEnum.freq());
-      assertEquals(expectedPositions[2], dpEnum.nextPosition());
-
-      assertNull(termsEnum.next());
-    }
-    reader.close();
-    dir.close();
-  }
-
-  public void testTermPositionVectors() throws IOException {
-    Query query = new TermQuery(new Term("field", "zero"));
-    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals(1, hits.length);
-
-    DocsAndPositionsEnum dpEnum = null;
-    for (int i = 0; i < hits.length; i++) {
-      Fields vectors = searcher.reader.getTermVectors(hits[i].doc);
-      assertNotNull(vectors);
-      assertEquals(1, vectors.size());
-      
-      TermsEnum termsEnum = vectors.terms("field").iterator(null);
-      assertNotNull(termsEnum.next());
-
-      boolean shouldBePosVector = hits[i].doc % 2 == 0;
-      boolean shouldBeOffVector = hits[i].doc % 3 == 0;
-      
-      if (shouldBePosVector || shouldBeOffVector) {
-        while(true) {
-          dpEnum = termsEnum.docsAndPositions(null, dpEnum);
-          assertNotNull(dpEnum);
-          assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
-
-          dpEnum.nextPosition();
-
-          if (shouldBeOffVector) {
-            assertTrue(dpEnum.startOffset() != -1);
-            assertTrue(dpEnum.endOffset() != -1);
-          }
-
-          if (termsEnum.next() == null) {
-            break;
-          }
-        }
-      } else {
-        fail();
-      }
-    }
-  }
-  
-  public void testTermOffsetVectors() throws IOException {
-    Query query = new TermQuery(new Term("field", "fifty"));
-    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals(100, hits.length);
-      
-    for (int i = 0; i < hits.length; i++) {
-      Fields vectors = searcher.reader.getTermVectors(hits[i].doc);
-      assertNotNull(vectors);
-      assertEquals(1, vectors.size());
-    }
-  }
-
-  public void testKnownSetOfDocuments() throws IOException {
-    String test1 = "eating chocolate in a computer lab"; //6 terms
-    String test2 = "computer in a computer lab"; //5 terms
-    String test3 = "a chocolate lab grows old"; //5 terms
-    String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer
lab"; //13 terms
-    Map<String,Integer> test4Map = new HashMap<String,Integer>();
-    test4Map.put("chocolate", Integer.valueOf(3));
-    test4Map.put("lab", Integer.valueOf(2));
-    test4Map.put("eating", Integer.valueOf(1));
-    test4Map.put("computer", Integer.valueOf(1));
-    test4Map.put("with", Integer.valueOf(1));
-    test4Map.put("a", Integer.valueOf(1));
-    test4Map.put("colored", Integer.valueOf(1));
-    test4Map.put("in", Integer.valueOf(1));
-    test4Map.put("an", Integer.valueOf(1));
-    test4Map.put("computer", Integer.valueOf(1));
-    test4Map.put("old", Integer.valueOf(1));
-    
-    Document testDoc1 = new Document();
-    setupDoc(testDoc1, test1);
-    Document testDoc2 = new Document();
-    setupDoc(testDoc2, test2);
-    Document testDoc3 = new Document();
-    setupDoc(testDoc3, test3);
-    Document testDoc4 = new Document();
-    setupDoc(testDoc4, test4);
-    
-    Directory dir = newDirectory();
-    
-    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, 
-        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE,
true))
-          .setOpenMode(OpenMode.CREATE)
-          .setMergePolicy(newLogMergePolicy())
-          .setSimilarity(new DefaultSimilarity()));
-    writer.addDocument(testDoc1);
-    writer.addDocument(testDoc2);
-    writer.addDocument(testDoc3);
-    writer.addDocument(testDoc4);
-    IndexReader reader = writer.getReader();
-    writer.close();
-    IndexSearcher knownSearcher = newSearcher(reader);
-    knownSearcher.setSimilarity(new DefaultSimilarity());
-    Fields fields = MultiFields.getFields(knownSearcher.reader);
-    
-    DocsEnum docs = null;
-    for (String fieldName : fields) {
-      Terms terms = fields.terms(fieldName);
-      assertNotNull(terms); // NOTE: kinda sketchy assumptions, but ideally we would fix
fieldsenum api... 
-      TermsEnum termsEnum = terms.iterator(null);
-
-      while (termsEnum.next() != null) {
-        String text = termsEnum.term().utf8ToString();
-        docs = _TestUtil.docs(random(), termsEnum, MultiFields.getLiveDocs(knownSearcher.reader),
docs, DocsEnum.FLAG_FREQS);
-        
-        while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
-          int docId = docs.docID();
-          int freq = docs.freq();
-          //System.out.println("Doc Id: " + docId + " freq " + freq);
-          Terms vector = knownSearcher.reader.getTermVectors(docId).terms("field");
-          //float tf = sim.tf(freq);
-          //float idf = sim.idf(knownSearcher.docFreq(term), knownSearcher.maxDoc());
-          //float qNorm = sim.queryNorm()
-          //This is fine since we don't have stop words
-          //float lNorm = sim.lengthNorm("field", vector.getTerms().length);
-          //float coord = sim.coord()
-          //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
-          assertNotNull(vector);
-          TermsEnum termsEnum2 = vector.iterator(null);
-
-          while(termsEnum2.next() != null) {
-            if (text.equals(termsEnum2.term().utf8ToString())) {
-              assertEquals(freq, termsEnum2.totalTermFreq());
-            }
-          }
-        }
-      }
-      //System.out.println("--------");
-    }
-    Query query = new TermQuery(new Term("field", "chocolate"));
-    ScoreDoc[] hits = knownSearcher.search(query, null, 1000).scoreDocs;
-    //doc 3 should be the first hit b/c it is the shortest match
-    assertTrue(hits.length == 3);
-    /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String:
" + hits.doc(0).toString());
-      System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0)));
-      System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String:
" + hits.doc(1).toString());
-      System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1)));
-      System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String:
" +  hits.doc(2).toString());
-      System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/
-    assertTrue(hits[0].doc == 2);
-    assertTrue(hits[1].doc == 3);
-    assertTrue(hits[2].doc == 0);
-    Terms vector = knownSearcher.reader.getTermVectors(hits[1].doc).terms("field");
-    assertNotNull(vector);
-    //System.out.println("Vector: " + vector);
-    assertEquals(10, vector.size());
-    TermsEnum termsEnum = vector.iterator(null);
-    while(termsEnum.next() != null) {
-      String term = termsEnum.term().utf8ToString();
-      //System.out.println("Term: " + term);
-      int freq = (int) termsEnum.totalTermFreq();
-      assertTrue(test4.indexOf(term) != -1);
-      Integer freqInt = test4Map.get(term);
-      assertTrue(freqInt != null);
-      assertEquals(freqInt.intValue(), freq);
-    }
-    reader.close();
-    dir.close();
-  } 
-  
-  private void setupDoc(Document doc, String text)
-  {
-    FieldType ft = new FieldType(TextField.TYPE_STORED);
-    ft.setStoreTermVectors(true);
-    ft.setStoreTermVectorOffsets(true);
-    ft.setStoreTermVectorPositions(true);
-    FieldType ft2 = new FieldType(TextField.TYPE_STORED);
-    ft2.setStoreTermVectors(true);
-    doc.add(newField("field2", text, ft));
-    doc.add(newField("field", text, ft2));
-    //System.out.println("Document: " + doc);
-  }
-
-  // Test only a few docs having vectors
-  public void testRareVectors() throws IOException {
-    RandomIndexWriter writer = new RandomIndexWriter(random(), directory, 
-        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE,
true))
-        .setOpenMode(OpenMode.CREATE));
-    if (VERBOSE) {
-      System.out.println("TEST: now add non-vectors");
-    }
-    for (int i = 0; i < 100; i++) {
-      Document doc = new Document();
-      doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES));
-      writer.addDocument(doc);
-    }
-    if (VERBOSE) {
-      System.out.println("TEST: now add vectors");
-    }
-    FieldType ft = new FieldType(TextField.TYPE_STORED);
-    ft.setStoreTermVectors(true);
-    ft.setStoreTermVectorOffsets(true);
-    ft.setStoreTermVectorPositions(true);
-    for(int i=0;i<10;i++) {
-      Document doc = new Document();
-      doc.add(newField("field", English.intToEnglish(100+i), ft));
-      writer.addDocument(doc);
-    }
-
-    if (VERBOSE) {
-      System.out.println("TEST: now getReader");
-    }
-    IndexReader reader = writer.getReader();
-    writer.close();
-    IndexSearcher searcher = newSearcher(reader);
-
-    Query query = new TermQuery(new Term("field", "hundred"));
-    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals(10, hits.length);
-    for (int i = 0; i < hits.length; i++) {
-
-      Fields vectors = searcher.reader.getTermVectors(hits[i].doc);
-      assertNotNull(vectors);
-      assertEquals(1, vectors.size());
-    }
-    reader.close();
-  }
-
-
   // In a single doc, for the same field, mix the term
   // vectors up
   public void testMixedVectrosVectors() throws IOException {



Mime
View raw message