lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sim...@apache.org
Subject svn commit: r1034471 - in /lucene/dev/branches/docvalues/lucene/src: java/org/apache/lucene/index/ java/org/apache/lucene/index/codecs/docvalues/ java/org/apache/lucene/index/values/ test/org/apache/lucene/index/values/
Date Fri, 12 Nov 2010 17:07:39 GMT
Author: simonw
Date: Fri Nov 12 17:07:39 2010
New Revision: 1034471

URL: http://svn.apache.org/viewvc?rev=1034471&view=rev
Log:
splitted DocValues TestCase into two and converted some nocommits to TODOs

Added:
    lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java
      - copied, changed from r1034464, lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
    lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
  (with props)
Removed:
    lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
Modified:
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Bytes.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Floats.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=1034471&r1=1034470&r2=1034471&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
(original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
Fri Nov 12 17:07:39 2010
@@ -41,7 +41,7 @@ final class DocFieldProcessor extends Do
   final FieldInfos fieldInfos;
   final DocFieldConsumer consumer;
   final StoredFieldsWriter fieldsWriter;
-  final private Map<String,DocValuesConsumer> docValues = new HashMap<String,DocValuesConsumer>();
+  final private Map<String, DocValuesConsumer> docValues = new HashMap<String, DocValuesConsumer>();
   private FieldsConsumer fieldsConsumer; // TODO this should be encapsulated in DocumentsWriter
 
   synchronized DocValuesConsumer docValuesConsumer(Directory dir,

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java?rev=1034471&r1=1034470&r2=1034471&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
(original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/docvalues/DocValuesCodec.java
Fri Nov 12 17:07:39 2010
@@ -129,7 +129,8 @@ public class DocValuesCodec extends Code
       throws IOException {
     Set<String> otherFiles = new HashSet<String>();
     other.files(dir, segmentInfo, otherFiles);
-    for (String string : otherFiles) {
+    for (String string : otherFiles) { // under some circumstances we only write DocValues
+                                       // so other files will be added even if they don't
exist
       if (dir.fileExists(string))
         files.add(string);
     }
@@ -141,10 +142,6 @@ public class DocValuesCodec extends Code
         files.add(file);
       }
     }
-    // files.add(IndexFileNames.segmentFileName(segmentInfo.name, "",
-    // Writer.DATA_EXTENSION));
-    // files.add(IndexFileNames.segmentFileName(segmentInfo.name, "",
-    // Writer.INDEX_EXTENSION));
 
   }
 

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Bytes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Bytes.java?rev=1034471&r1=1034470&r2=1034471&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Bytes.java
(original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Bytes.java
Fri Nov 12 17:07:39 2010
@@ -21,7 +21,6 @@ package org.apache.lucene.index.values;
 import java.io.IOException;
 import java.util.Collection;
 import java.util.Comparator;
-import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.lucene.index.IndexFileNames;
@@ -59,7 +58,7 @@ public final class Bytes {
   };
 
   
-  // nocommit -- i shouldn't have to specify fixed? can
+  // TODO -- i shouldn't have to specify fixed? can
   // track itself & do the write thing at write time?
   public static Writer getWriter(Directory dir, String id, Mode mode,
       Comparator<BytesRef> comp, boolean fixedSize) throws IOException {
@@ -240,11 +239,12 @@ public final class Bytes {
 
     @Override
     public void files(Collection<String> files) throws IOException {
+      assert datOut != null;
       files.add(IndexFileNames.segmentFileName(id, "",
           IndexFileNames.CSF_DATA_EXTENSION));
-      final String idxFile = IndexFileNames.segmentFileName(id, "",
+      if(idxOut != null) { // called after flush - so this must be initialized if needed
or present
+        final String idxFile = IndexFileNames.segmentFileName(id, "",
           IndexFileNames.CSF_INDEX_EXTENSION);
-      if (dir.fileExists(idxFile)) { // TODO is this correct? could be initialized lazy
         files.add(idxFile);
       }
     }
@@ -279,11 +279,11 @@ public final class Bytes {
     }
 
     protected final IndexInput cloneData() {
-      // is never NULL
+      assert datIn != null;
       return (IndexInput) datIn.clone();
     }
 
-    protected final IndexInput cloneIndex() {
+    protected final IndexInput cloneIndex() { // TODO assert here for null rather than return
null
       return idxIn == null ? null : (IndexInput) idxIn.clone();
     }
 

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Floats.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Floats.java?rev=1034471&r1=1034470&r2=1034471&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Floats.java
(original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Floats.java
Fri Nov 12 17:07:39 2010
@@ -19,8 +19,7 @@ import org.apache.lucene.util.RamUsageEs
  * Exposes writer/reader for floating point values. You can specify 4 (java
  * float) or 8 (java double) byte precision.
  */
-//nocommit - add mmap version
-//nocommti - add bulk copy where possible
+//TODO - add bulk copy where possible
 public class Floats {
   private static final String CODEC_NAME = "SimpleFloats";
   static final int VERSION_START = 0;

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java?rev=1034471&r1=1034470&r2=1034471&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
(original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
Fri Nov 12 17:07:39 2010
@@ -41,7 +41,7 @@ class PackedIntsImpl {
   static class IntsWriter extends Writer {
    
 
-    // nocommit - can we bulkcopy this on a merge?
+    // TODO: can we bulkcopy this on a merge?
     private LongsRef intsRef;
     private long[] docToValue;
     private long minValue;

Copied: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java
(from r1034464, lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java?p2=lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java&p1=lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java&r1=1034464&r2=1034471&rev=1034471&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestIndexValues.java
(original)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestDocValues.java
Fri Nov 12 17:07:39 2010
@@ -17,67 +17,23 @@ package org.apache.lucene.index.values;
  * limitations under the License.
  */
 
-import java.io.Closeable;
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
 import java.util.Comparator;
-import java.util.EnumSet;
-import java.util.List;
 
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.document.ValuesField;
-import org.apache.lucene.document.Field.Index;
-import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.FieldsEnum;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.LogDocMergePolicy;
-import org.apache.lucene.index.LogMergePolicy;
-import org.apache.lucene.index.MergePolicy;
-import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.codecs.CodecProvider;
-import org.apache.lucene.index.codecs.docvalues.DocValuesCodec;
 import org.apache.lucene.index.values.DocValues.SortedSource;
 import org.apache.lucene.index.values.DocValues.Source;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.LockObtainFailedException;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FloatsRef;
 import org.apache.lucene.util.LongsRef;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.OpenBitSet;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util._TestUtil;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
 
-public class TestIndexValues extends LuceneTestCase {
+public class TestDocValues extends LuceneTestCase {
 
-  // TODO test addIndexes
-  private static DocValuesCodec docValuesCodec;
-  private static CodecProvider provider;
-  @BeforeClass
-  public static void beforeClassLuceneTestCaseJ4() {
-    LuceneTestCase.beforeClassLuceneTestCaseJ4();
-    provider = new CodecProvider();
-    docValuesCodec = new DocValuesCodec(CodecProvider.getDefault().lookup(CodecProvider
-        .getDefaultCodec()));
-    provider.register(docValuesCodec);
-    provider.setDefaultFieldCodec(docValuesCodec.name);
-  }
-
-  @AfterClass
-  public static void afterClassLuceneTestCaseJ4() {
-    LuceneTestCase.afterClassLuceneTestCaseJ4();
-  }
+  // TODO -- for sorted test, do our own Sort of the
+  // values and verify it's identical
 
   public void testBytesStraight() throws IOException {
     runTestBytes(Bytes.Mode.STRAIGHT, true);
@@ -94,8 +50,6 @@ public class TestIndexValues extends Luc
     runTestBytes(Bytes.Mode.SORTED, false);
   }
 
-  // nocommit -- for sorted test, do our own Sort of the
-  // values and verify it's identical
   public void runTestBytes(final Bytes.Mode mode, final boolean fixedSize)
       throws IOException {
 
@@ -350,373 +304,15 @@ public class TestIndexValues extends Luc
     runTestFloats(8, 0.0);
   }
 
-  /**
-   * Tests complete indexing of {@link Values} including deletions, merging and
-   * sparse value fields on Compound-File
-   */
-  public void testCFSIndex() throws IOException {
-    // without deletions
-    IndexWriterConfig cfg = writerConfig(true);
-    // primitives - no deletes
-    runTestNumerics(cfg, false);
-
-    cfg = writerConfig(true);
-    // bytes - no deletes
-    runTestIndexBytes(cfg, false);
-
-    // with deletions
-    cfg = writerConfig(true);
-    // primitives
-    runTestNumerics(cfg, true);
-
-    cfg = writerConfig(true);
-    // bytes
-    runTestIndexBytes(cfg, true);
-  }
-
-  /**
-   * Tests complete indexing of {@link Values} including deletions, merging and
-   * sparse value fields on None-Compound-File
-   */
-  public void testIndex() throws IOException {
-    //
-    // without deletions
-    IndexWriterConfig cfg = writerConfig(false);
-    // primitives - no deletes
-    runTestNumerics(cfg, false);
-
-    cfg = writerConfig(false);
-    // bytes - no deletes
-    runTestIndexBytes(cfg, false);
-
-    // with deletions
-    cfg = writerConfig(false);
-    // primitives
-    runTestNumerics(cfg, true);
-
-    cfg = writerConfig(false);
-    // bytes
-    runTestIndexBytes(cfg, true);
-  }
-
-  private IndexWriterConfig writerConfig(boolean useCompoundFile) {
-    final IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT,
-        new MockAnalyzer());
-    MergePolicy mergePolicy = cfg.getMergePolicy();
-    if (mergePolicy instanceof LogMergePolicy) {
-      ((LogMergePolicy) mergePolicy).setUseCompoundFile(useCompoundFile);
-    } else if (useCompoundFile) {
-      LogMergePolicy policy = new LogDocMergePolicy();
-      policy.setUseCompoundFile(useCompoundFile);
-      cfg.setMergePolicy(policy);
-    }
-    cfg.setCodecProvider(provider);
-    return cfg;
-  }
-
-  public void runTestNumerics(IndexWriterConfig cfg, boolean withDeletions)
-      throws IOException {
-    Directory d = newDirectory();
-    IndexWriter w = new IndexWriter(d, cfg);
-    final int numValues = 350;
-    final List<Values> numVariantList = new ArrayList<Values>(NUMERICS);
-
-    // run in random order to test if fill works correctly during merges
-    Collections.shuffle(numVariantList, random);
-    for (Values val : numVariantList) {
-      OpenBitSet deleted = indexValues(w, numValues, val, numVariantList,
-          withDeletions, 7);
-      List<Closeable> closeables = new ArrayList<Closeable>();
-      IndexReader r = IndexReader.open(w);
-      final int numRemainingValues = (int) (numValues - deleted.cardinality());
-      final int base = r.numDocs() - numRemainingValues;
-      switch (val) {
-      case PACKED_INTS:
-      case PACKED_INTS_FIXED: {
-        DocValues intsReader = getDocValues(r, val.name());
-        assertNotNull(intsReader);
-
-        Source ints = getSource(intsReader);
-        
-        ValuesEnum intsEnum = intsReader.getEnum();
-        assertNotNull(intsEnum);
-        LongsRef enumRef = intsEnum.addAttribute(ValuesAttribute.class).ints();
-        for (int i = 0; i < base; i++) {
-          assertEquals("index " + i, 0, ints.getInt(i));
-          assertEquals(val.name() + " base: " + base + " index: " + i, i,
-              random.nextBoolean() ? intsEnum.advance(i) : intsEnum.nextDoc());
-          assertEquals(0, enumRef.get());
-        }
-        int expected = 0;
-        for (int i = base; i < r.numDocs(); i++, expected++) {
-          while (deleted.get(expected)) {
-            expected++;
-          }
-          assertEquals("advance failed at index: " + i + " of " + r.numDocs()
-              + " docs", i, intsEnum.advance(i));
-          assertEquals(expected, enumRef.get());
-          assertEquals(expected, ints.getInt(i));
-
-        }
-      }
-        break;
-      case SIMPLE_FLOAT_4BYTE:
-      case SIMPLE_FLOAT_8BYTE: {
-        DocValues floatReader = getDocValues(r, val.name());
-        assertNotNull(floatReader);
-        Source floats = getSource(floatReader);
-        ValuesEnum floatEnum = floatReader.getEnum();
-        assertNotNull(floatEnum);
-        FloatsRef enumRef = floatEnum.addAttribute(ValuesAttribute.class)
-            .floats();
-
-        for (int i = 0; i < base; i++) {
-          assertEquals(" floats failed for doc: " + i + " base: " + base, 0.0d,
-              floats.getFloat(i), 0.0d);
-          assertEquals(i, random.nextBoolean() ? floatEnum.advance(i)
-              : floatEnum.nextDoc());
-          assertEquals("index " + i, 0.0, enumRef.get(), 0.0);
-        }
-        int expected = 0;
-        for (int i = base; i < r.numDocs(); i++, expected++) {
-          while (deleted.get(expected)) {
-            expected++;
-          }
-          assertEquals("advance failed at index: " + i + " of " + r.numDocs()
-              + " docs base:" + base, i, floatEnum.advance(i));
-          assertEquals("index " + i, 2.0 * expected, enumRef.get(), 0.00001);
-          assertEquals("index " + i, 2.0 * expected, floats.getFloat(i),
-              0.00001);
-        }
-      }
-        break;
-      default:
-        fail("unexpected value " + val);
-      }
-
-      closeables.add(r);
-      for (Closeable toClose : closeables) {
-        toClose.close();
-      }
-    }
-    w.close();
-    d.close();
-  }
-
-  private static EnumSet<Values> BYTES = EnumSet.of(Values.BYTES_FIXED_DEREF,
-      Values.BYTES_FIXED_SORTED, Values.BYTES_FIXED_STRAIGHT,
-      Values.BYTES_VAR_DEREF, Values.BYTES_VAR_SORTED,
-      Values.BYTES_VAR_STRAIGHT);
-
-  private static EnumSet<Values> NUMERICS = EnumSet.of(Values.PACKED_INTS,
-      Values.PACKED_INTS_FIXED, Values.SIMPLE_FLOAT_4BYTE,
-      Values.SIMPLE_FLOAT_8BYTE);
-
-  private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED,
-      Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS,
-      Index.NO };
-
-  private OpenBitSet indexValues(IndexWriter w, int numValues, Values value,
-      List<Values> valueVarList, boolean withDeletions, int multOfSeven)
-      throws CorruptIndexException, IOException {
-    final boolean isNumeric = NUMERICS.contains(value);
-    OpenBitSet deleted = new OpenBitSet(numValues);
-    Document doc = new Document();
-    Index idx = IDX_VALUES[random.nextInt(IDX_VALUES.length)];
-    Fieldable field = random.nextBoolean() ? new ValuesField(value.name())
-        : newField(value.name(), _TestUtil.randomRealisticUnicodeString(random,
-            10), idx == Index.NO ? Store.YES : Store.NO, idx);
-    doc.add(field);
-
-    ValuesAttribute valuesAttribute = ValuesField.values(field);
-    valuesAttribute.setType(value);
-    final LongsRef intsRef = valuesAttribute.ints();
-    final FloatsRef floatsRef = valuesAttribute.floats();
-    final BytesRef bytesRef = valuesAttribute.bytes();
-
-    final String idBase = value.name() + "_";
-    final byte[] b = new byte[multOfSeven];
-    if (bytesRef != null) {
-      bytesRef.bytes = b;
-      bytesRef.length = b.length;
-      bytesRef.offset = 0;
-    }
-    // 
-    byte upto = 0;
-    for (int i = 0; i < numValues; i++) {
-      if (isNumeric) {
-        switch (value) {
-        case PACKED_INTS:
-        case PACKED_INTS_FIXED:
-          intsRef.set(i);
-          break;
-        case SIMPLE_FLOAT_4BYTE:
-        case SIMPLE_FLOAT_8BYTE:
-          floatsRef.set(2.0f * i);
-          break;
-        default:
-          fail("unexpected value " + value);
-        }
-      } else {
-        for (int j = 0; j < b.length; j++) {
-          b[j] = upto++;
-        }
-      }
-      doc.removeFields("id");
-      doc.add(new Field("id", idBase + i, Store.YES,
-          Index.NOT_ANALYZED_NO_NORMS));
-      w.addDocument(doc);
-
-      if (i % 7 == 0) {
-        if (withDeletions && random.nextBoolean()) {
-          Values val = valueVarList.get(random.nextInt(1 + valueVarList
-              .indexOf(value)));
-          final int randInt = val == value ? random.nextInt(1 + i) : random
-              .nextInt(numValues);
-          w.deleteDocuments(new Term("id", val.name() + "_" + randInt));
-          if (val == value) {
-            deleted.set(randInt);
-          }
-        }
-        w.commit();
-
-      }
-    }
-    w.commit();
-
-    // TODO test unoptimized with deletions
-    if (withDeletions || random.nextBoolean())
-      w.optimize();
-    return deleted;
-  }
-
-  public void runTestIndexBytes(IndexWriterConfig cfg, boolean withDeletions)
-      throws CorruptIndexException, LockObtainFailedException, IOException {
-    Directory d = newDirectory();
-    IndexWriter w = new IndexWriter(d, cfg);
-    final List<Values> byteVariantList = new ArrayList<Values>(BYTES);
-    // run in random order to test if fill works correctly during merges
-    Collections.shuffle(byteVariantList, random);
-    final int numValues = 179 + random.nextInt(151);
-    for (Values byteIndexValue : byteVariantList) {
-      List<Closeable> closeables = new ArrayList<Closeable>();
-
-      int bytesSize = 7 + random.nextInt(128);
-      OpenBitSet deleted = indexValues(w, numValues, byteIndexValue,
-          byteVariantList, withDeletions, bytesSize);
-      final IndexReader r = IndexReader.open(w);
-      assertEquals(0, r.numDeletedDocs());
-      final int numRemainingValues = (int) (numValues - deleted.cardinality());
-      final int base = r.numDocs() - numRemainingValues;
-      DocValues bytesReader = getDocValues(r, byteIndexValue.name());
-      assertNotNull("field " + byteIndexValue.name()
-          + " returned null reader - maybe merged failed", bytesReader);
-      Source bytes = getSource(bytesReader);
-      ValuesEnum bytesEnum = bytesReader.getEnum();
-      assertNotNull(bytesEnum);
-      final ValuesAttribute attr = bytesEnum
-          .addAttribute(ValuesAttribute.class);
-      byte upto = 0;
-      // test the filled up slots for correctness
-      for (int i = 0; i < base; i++) {
-        final BytesRef br = bytes.getBytes(i);
-        String msg = " field: " + byteIndexValue.name() + " at index: " + i
-            + " base: " + base + " numDocs:" + r.numDocs();
-        switch (byteIndexValue) {
-        case BYTES_VAR_STRAIGHT:
-        case BYTES_FIXED_STRAIGHT:
-          assertEquals(i, bytesEnum.advance(i));
-          // fixed straight returns bytesref with zero bytes all of fixed
-          // length
-          assertNotNull("expected none null - " + msg, br);
-          if (br.length != 0) {
-            assertEquals("expected zero bytes of length " + bytesSize + " - "
-                + msg, bytesSize, br.length);
-            for (int j = 0; j < br.length; j++) {
-              assertEquals("Byte at index " + j + " doesn't match - " + msg, 0,
-                  br.bytes[br.offset + j]);
-            }
-          }
-          break;
-        case BYTES_VAR_SORTED:
-        case BYTES_FIXED_SORTED:
-        case BYTES_VAR_DEREF:
-        case BYTES_FIXED_DEREF:
-        default:
-          assertNotNull("expected none null - " + msg, br);
-          if (br.length != 0) {
-            bytes.getBytes(i);
-          }
-          assertEquals("expected empty bytes - " + br.utf8ToString() + msg, 0,
-              br.length);
-        }
-      }
-      final BytesRef enumRef = attr.bytes();
-
-      // test the actual doc values added in this iteration
-      assertEquals(base + numRemainingValues, r.numDocs());
-      int v = 0;
-      for (int i = base; i < r.numDocs(); i++) {
-
-        String msg = " field: " + byteIndexValue.name() + " at index: " + i
-            + " base: " + base + " numDocs:" + r.numDocs() + " bytesSize: "
-            + bytesSize;
-        while (withDeletions && deleted.get(v++)) {
-          upto += bytesSize;
-        }
-
-        BytesRef br = bytes.getBytes(i);
-        if (bytesEnum.docID() != i)
-          assertEquals("seek failed for index " + i + " " + msg, i, bytesEnum
-              .advance(i));
-        for (int j = 0; j < br.length; j++, upto++) {
-          assertEquals(
-              "EnumRef Byte at index " + j + " doesn't match - " + msg, upto,
-              enumRef.bytes[enumRef.offset + j]);
-          assertEquals("SourceRef Byte at index " + j + " doesn't match - "
-              + msg, upto, br.bytes[br.offset + j]);
-        }
-      }
-
-      // clean up
-      closeables.add(r);
-      for (Closeable toClose : closeables) {
-        toClose.close();
-      }
-    }
-
-    w.close();
-    d.close();
-  }
-
-  private DocValues getDocValues(IndexReader reader, String field)
-      throws IOException {
-    boolean optimized = reader.isOptimized();
-    Fields fields = optimized ? reader.getSequentialSubReaders()[0].fields()
-        : MultiFields.getFields(reader);
-    switch (random.nextInt(optimized ? 3 : 2)) { // case 2 only if optimized
-    case 0:
-      return fields.docValues(field);
-    case 1:
-      FieldsEnum iterator = fields.iterator();
-      String name;
-      while ((name = iterator.next()) != null) {
-        if (name.equals(field))
-          return iterator.docValues();
-      }
-      throw new RuntimeException("no such field " + field);
-    case 2:// this only works if we are on an optimized index!
-      return reader.getSequentialSubReaders()[0].docValues(field);
-    }
-    throw new RuntimeException();
-  }
-
   private Source getSource(DocValues values) throws IOException {
     // getSource uses cache internally
     return random.nextBoolean() ? values.load() : values.getSource();
   }
-  private SortedSource getSortedSource(DocValues values, Comparator<BytesRef> comparator)
throws IOException {
+
+  private SortedSource getSortedSource(DocValues values,
+      Comparator<BytesRef> comparator) throws IOException {
     // getSortedSource uses cache internally
-    return random.nextBoolean() ? values.loadSorted(comparator) : values.getSortedSorted(comparator);
+    return random.nextBoolean() ? values.loadSorted(comparator) : values
+        .getSortedSorted(comparator);
   }
 }

Added: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java?rev=1034471&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
(added)
+++ lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
Fri Nov 12 17:07:39 2010
@@ -0,0 +1,451 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.List;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.ValuesField;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FieldsEnum;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LogDocMergePolicy;
+import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.MergePolicy;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.codecs.docvalues.DocValuesCodec;
+import org.apache.lucene.index.values.DocValues.Source;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FloatsRef;
+import org.apache.lucene.util.LongsRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util._TestUtil;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+/**
+ * 
+ * Tests DocValues integration into IndexWriter & Codecs
+ * 
+ */
+public class TestDocValuesIndexing extends LuceneTestCase {
+  // TODO Add a test for addIndexes
+  // TODO add test for unoptimized case with deletes
+
+  private static DocValuesCodec docValuesCodec;
+  private static CodecProvider provider;
+
+  @BeforeClass
+  public static void beforeClassLuceneTestCaseJ4() {
+    LuceneTestCase.beforeClassLuceneTestCaseJ4();
+    provider = new CodecProvider();
+    docValuesCodec = new DocValuesCodec(CodecProvider.getDefault().lookup(
+        CodecProvider.getDefaultCodec()));
+    provider.register(docValuesCodec);
+    provider.setDefaultFieldCodec(docValuesCodec.name);
+  }
+
+  @AfterClass
+  public static void afterClassLuceneTestCaseJ4() {
+    LuceneTestCase.afterClassLuceneTestCaseJ4();
+  }
+
+  /**
+   * Tests complete indexing of {@link Values} including deletions, merging and
+   * sparse value fields on Compound-File
+   */
+  public void testCFSIndex() throws IOException {
+    // without deletions
+    IndexWriterConfig cfg = writerConfig(true);
+    // primitives - no deletes
+    runTestNumerics(cfg, false);
+
+    cfg = writerConfig(true);
+    // bytes - no deletes
+    runTestIndexBytes(cfg, false);
+
+    // with deletions
+    cfg = writerConfig(true);
+    // primitives
+    runTestNumerics(cfg, true);
+
+    cfg = writerConfig(true);
+    // bytes
+    runTestIndexBytes(cfg, true);
+  }
+
+  /**
+   * Tests complete indexing of {@link Values} including deletions, merging and
+   * sparse value fields on None-Compound-File
+   */
+  public void testIndex() throws IOException {
+    //
+    // without deletions
+    IndexWriterConfig cfg = writerConfig(false);
+    // primitives - no deletes
+    runTestNumerics(cfg, false);
+
+    cfg = writerConfig(false);
+    // bytes - no deletes
+    runTestIndexBytes(cfg, false);
+
+    // with deletions
+    cfg = writerConfig(false);
+    // primitives
+    runTestNumerics(cfg, true);
+
+    cfg = writerConfig(false);
+    // bytes
+    runTestIndexBytes(cfg, true);
+  }
+
+  private IndexWriterConfig writerConfig(boolean useCompoundFile) {
+    final IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT,
+        new MockAnalyzer());
+    MergePolicy mergePolicy = cfg.getMergePolicy();
+    if (mergePolicy instanceof LogMergePolicy) {
+      ((LogMergePolicy) mergePolicy).setUseCompoundFile(useCompoundFile);
+    } else if (useCompoundFile) {
+      LogMergePolicy policy = new LogDocMergePolicy();
+      policy.setUseCompoundFile(useCompoundFile);
+      cfg.setMergePolicy(policy);
+    }
+    cfg.setCodecProvider(provider);
+    return cfg;
+  }
+
+  public void runTestNumerics(IndexWriterConfig cfg, boolean withDeletions)
+      throws IOException {
+    Directory d = newDirectory();
+    IndexWriter w = new IndexWriter(d, cfg);
+    final int numValues = 350;
+    final List<Values> numVariantList = new ArrayList<Values>(NUMERICS);
+
+    // run in random order to test if fill works correctly during merges
+    Collections.shuffle(numVariantList, random);
+    for (Values val : numVariantList) {
+      OpenBitSet deleted = indexValues(w, numValues, val, numVariantList,
+          withDeletions, 7);
+      List<Closeable> closeables = new ArrayList<Closeable>();
+      IndexReader r = IndexReader.open(w);
+      final int numRemainingValues = (int) (numValues - deleted.cardinality());
+      final int base = r.numDocs() - numRemainingValues;
+      switch (val) {
+      case PACKED_INTS:
+      case PACKED_INTS_FIXED: {
+        DocValues intsReader = getDocValues(r, val.name());
+        assertNotNull(intsReader);
+
+        Source ints = getSource(intsReader);
+
+        ValuesEnum intsEnum = intsReader.getEnum();
+        assertNotNull(intsEnum);
+        LongsRef enumRef = intsEnum.addAttribute(ValuesAttribute.class).ints();
+        for (int i = 0; i < base; i++) {
+          assertEquals("index " + i, 0, ints.getInt(i));
+          assertEquals(val.name() + " base: " + base + " index: " + i, i,
+              random.nextBoolean() ? intsEnum.advance(i) : intsEnum.nextDoc());
+          assertEquals(0, enumRef.get());
+        }
+        int expected = 0;
+        for (int i = base; i < r.numDocs(); i++, expected++) {
+          while (deleted.get(expected)) {
+            expected++;
+          }
+          assertEquals("advance failed at index: " + i + " of " + r.numDocs()
+              + " docs", i, intsEnum.advance(i));
+          assertEquals(expected, enumRef.get());
+          assertEquals(expected, ints.getInt(i));
+
+        }
+      }
+        break;
+      case SIMPLE_FLOAT_4BYTE:
+      case SIMPLE_FLOAT_8BYTE: {
+        DocValues floatReader = getDocValues(r, val.name());
+        assertNotNull(floatReader);
+        Source floats = getSource(floatReader);
+        ValuesEnum floatEnum = floatReader.getEnum();
+        assertNotNull(floatEnum);
+        FloatsRef enumRef = floatEnum.addAttribute(ValuesAttribute.class)
+            .floats();
+
+        for (int i = 0; i < base; i++) {
+          assertEquals(" floats failed for doc: " + i + " base: " + base, 0.0d,
+              floats.getFloat(i), 0.0d);
+          assertEquals(i, random.nextBoolean() ? floatEnum.advance(i)
+              : floatEnum.nextDoc());
+          assertEquals("index " + i, 0.0, enumRef.get(), 0.0);
+        }
+        int expected = 0;
+        for (int i = base; i < r.numDocs(); i++, expected++) {
+          while (deleted.get(expected)) {
+            expected++;
+          }
+          assertEquals("advance failed at index: " + i + " of " + r.numDocs()
+              + " docs base:" + base, i, floatEnum.advance(i));
+          assertEquals("index " + i, 2.0 * expected, enumRef.get(), 0.00001);
+          assertEquals("index " + i, 2.0 * expected, floats.getFloat(i),
+              0.00001);
+        }
+      }
+        break;
+      default:
+        fail("unexpected value " + val);
+      }
+
+      closeables.add(r);
+      for (Closeable toClose : closeables) {
+        toClose.close();
+      }
+    }
+    w.close();
+    d.close();
+  }
+
+  private static EnumSet<Values> BYTES = EnumSet.of(Values.BYTES_FIXED_DEREF,
+      Values.BYTES_FIXED_SORTED, Values.BYTES_FIXED_STRAIGHT,
+      Values.BYTES_VAR_DEREF, Values.BYTES_VAR_SORTED,
+      Values.BYTES_VAR_STRAIGHT);
+
+  private static EnumSet<Values> NUMERICS = EnumSet.of(Values.PACKED_INTS,
+      Values.PACKED_INTS_FIXED, Values.SIMPLE_FLOAT_4BYTE,
+      Values.SIMPLE_FLOAT_8BYTE);
+
+  private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED,
+      Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS,
+      Index.NO };
+
+  private OpenBitSet indexValues(IndexWriter w, int numValues, Values value,
+      List<Values> valueVarList, boolean withDeletions, int multOfSeven)
+      throws CorruptIndexException, IOException {
+    final boolean isNumeric = NUMERICS.contains(value);
+    OpenBitSet deleted = new OpenBitSet(numValues);
+    Document doc = new Document();
+    Index idx = IDX_VALUES[random.nextInt(IDX_VALUES.length)];
+    Fieldable field = random.nextBoolean() ? new ValuesField(value.name())
+        : newField(value.name(), _TestUtil.randomRealisticUnicodeString(random,
+            10), idx == Index.NO ? Store.YES : Store.NO, idx);
+    doc.add(field);
+
+    ValuesAttribute valuesAttribute = ValuesField.values(field);
+    valuesAttribute.setType(value);
+    final LongsRef intsRef = valuesAttribute.ints();
+    final FloatsRef floatsRef = valuesAttribute.floats();
+    final BytesRef bytesRef = valuesAttribute.bytes();
+
+    final String idBase = value.name() + "_";
+    final byte[] b = new byte[multOfSeven];
+    if (bytesRef != null) {
+      bytesRef.bytes = b;
+      bytesRef.length = b.length;
+      bytesRef.offset = 0;
+    }
+    // 
+    byte upto = 0;
+    for (int i = 0; i < numValues; i++) {
+      if (isNumeric) {
+        switch (value) {
+        case PACKED_INTS:
+        case PACKED_INTS_FIXED:
+          intsRef.set(i);
+          break;
+        case SIMPLE_FLOAT_4BYTE:
+        case SIMPLE_FLOAT_8BYTE:
+          floatsRef.set(2.0f * i);
+          break;
+        default:
+          fail("unexpected value " + value);
+        }
+      } else {
+        for (int j = 0; j < b.length; j++) {
+          b[j] = upto++;
+        }
+      }
+      doc.removeFields("id");
+      doc.add(new Field("id", idBase + i, Store.YES,
+          Index.NOT_ANALYZED_NO_NORMS));
+      w.addDocument(doc);
+
+      if (i % 7 == 0) {
+        if (withDeletions && random.nextBoolean()) {
+          Values val = valueVarList.get(random.nextInt(1 + valueVarList
+              .indexOf(value)));
+          final int randInt = val == value ? random.nextInt(1 + i) : random
+              .nextInt(numValues);
+          w.deleteDocuments(new Term("id", val.name() + "_" + randInt));
+          if (val == value) {
+            deleted.set(randInt);
+          }
+        }
+        w.commit();
+
+      }
+    }
+    w.commit();
+
+    // TODO test unoptimized with deletions
+    if (withDeletions || random.nextBoolean())
+      w.optimize();
+    return deleted;
+  }
+
+  public void runTestIndexBytes(IndexWriterConfig cfg, boolean withDeletions)
+      throws CorruptIndexException, LockObtainFailedException, IOException {
+    Directory d = newDirectory();
+    IndexWriter w = new IndexWriter(d, cfg);
+    final List<Values> byteVariantList = new ArrayList<Values>(BYTES);
+    // run in random order to test if fill works correctly during merges
+    Collections.shuffle(byteVariantList, random);
+    final int numValues = 179 + random.nextInt(151);
+    for (Values byteIndexValue : byteVariantList) {
+      List<Closeable> closeables = new ArrayList<Closeable>();
+
+      int bytesSize = 7 + random.nextInt(128);
+      OpenBitSet deleted = indexValues(w, numValues, byteIndexValue,
+          byteVariantList, withDeletions, bytesSize);
+      final IndexReader r = IndexReader.open(w);
+      assertEquals(0, r.numDeletedDocs());
+      final int numRemainingValues = (int) (numValues - deleted.cardinality());
+      final int base = r.numDocs() - numRemainingValues;
+      DocValues bytesReader = getDocValues(r, byteIndexValue.name());
+      assertNotNull("field " + byteIndexValue.name()
+          + " returned null reader - maybe merged failed", bytesReader);
+      Source bytes = getSource(bytesReader);
+      ValuesEnum bytesEnum = bytesReader.getEnum();
+      assertNotNull(bytesEnum);
+      final ValuesAttribute attr = bytesEnum
+          .addAttribute(ValuesAttribute.class);
+      byte upto = 0;
+      // test the filled up slots for correctness
+      for (int i = 0; i < base; i++) {
+        final BytesRef br = bytes.getBytes(i);
+        String msg = " field: " + byteIndexValue.name() + " at index: " + i
+            + " base: " + base + " numDocs:" + r.numDocs();
+        switch (byteIndexValue) {
+        case BYTES_VAR_STRAIGHT:
+        case BYTES_FIXED_STRAIGHT:
+          assertEquals(i, bytesEnum.advance(i));
+          // fixed straight returns bytesref with zero bytes all of fixed
+          // length
+          assertNotNull("expected none null - " + msg, br);
+          if (br.length != 0) {
+            assertEquals("expected zero bytes of length " + bytesSize + " - "
+                + msg, bytesSize, br.length);
+            for (int j = 0; j < br.length; j++) {
+              assertEquals("Byte at index " + j + " doesn't match - " + msg, 0,
+                  br.bytes[br.offset + j]);
+            }
+          }
+          break;
+        case BYTES_VAR_SORTED:
+        case BYTES_FIXED_SORTED:
+        case BYTES_VAR_DEREF:
+        case BYTES_FIXED_DEREF:
+        default:
+          assertNotNull("expected none null - " + msg, br);
+          if (br.length != 0) {
+            bytes.getBytes(i);
+          }
+          assertEquals("expected empty bytes - " + br.utf8ToString() + msg, 0,
+              br.length);
+        }
+      }
+      final BytesRef enumRef = attr.bytes();
+
+      // test the actual doc values added in this iteration
+      assertEquals(base + numRemainingValues, r.numDocs());
+      int v = 0;
+      for (int i = base; i < r.numDocs(); i++) {
+
+        String msg = " field: " + byteIndexValue.name() + " at index: " + i
+            + " base: " + base + " numDocs:" + r.numDocs() + " bytesSize: "
+            + bytesSize;
+        while (withDeletions && deleted.get(v++)) {
+          upto += bytesSize;
+        }
+
+        BytesRef br = bytes.getBytes(i);
+        if (bytesEnum.docID() != i)
+          assertEquals("seek failed for index " + i + " " + msg, i, bytesEnum
+              .advance(i));
+        for (int j = 0; j < br.length; j++, upto++) {
+          assertEquals(
+              "EnumRef Byte at index " + j + " doesn't match - " + msg, upto,
+              enumRef.bytes[enumRef.offset + j]);
+          assertEquals("SourceRef Byte at index " + j + " doesn't match - "
+              + msg, upto, br.bytes[br.offset + j]);
+        }
+      }
+
+      // clean up
+      closeables.add(r);
+      for (Closeable toClose : closeables) {
+        toClose.close();
+      }
+    }
+
+    w.close();
+    d.close();
+  }
+
+  private DocValues getDocValues(IndexReader reader, String field)
+      throws IOException {
+    boolean optimized = reader.isOptimized();
+    Fields fields = optimized ? reader.getSequentialSubReaders()[0].fields()
+        : MultiFields.getFields(reader);
+    switch (random.nextInt(optimized ? 3 : 2)) { // case 2 only if optimized
+    case 0:
+      return fields.docValues(field);
+    case 1:
+      FieldsEnum iterator = fields.iterator();
+      String name;
+      while ((name = iterator.next()) != null) {
+        if (name.equals(field))
+          return iterator.docValues();
+      }
+      throw new RuntimeException("no such field " + field);
+    case 2:// this only works if we are on an optimized index!
+      return reader.getSequentialSubReaders()[0].docValues(field);
+    }
+    throw new RuntimeException();
+  }
+
+  private Source getSource(DocValues values) throws IOException {
+    // getSource uses cache internally
+    return random.nextBoolean() ? values.load() : values.getSource();
+  }
+
+}

Propchange: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/dev/branches/docvalues/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL



Mime
View raw message