lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From uschind...@apache.org
Subject svn commit: r829972 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/
Date Mon, 26 Oct 2009 21:16:57 GMT
Author: uschindler
Date: Mon Oct 26 21:16:57 2009
New Revision: 829972

URL: http://svn.apache.org/viewvc?rev=829972&view=rev
Log:
LUCENE-1960: Add support for reading Field.Store.COMPRESS fields again and decompress on merge/optimize.
Also convert to a new stored file version number.

Added:
    lucene/java/trunk/src/test/org/apache/lucene/index/index.29.cfs.zip   (with props)
    lucene/java/trunk/src/test/org/apache/lucene/index/index.29.nocfs.zip   (with props)
Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=829972&r1=829971&r2=829972&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Mon Oct 26 21:16:57 2009
@@ -15,6 +15,25 @@
 * LUCENE-1677: Remove the system property to set SegmentReader class
   implementation.  (Uwe Schindler)
 
+* LUCENE-1960: As a consequence of the removal of Field.Store.COMPRESS,
+  support for this type of fields was removed. Lucene 3.0 is still able
+  to read indexes with compressed fields, but as soon as merges occur
+  or the index is optimized, all compressed fields are decompressed
+  and converted to Field.Store.YES. Because of this, indexes with
+  compressed fields can suddenly get larger. Also the first merge with
+  decompression cannot be done in raw mode, it is therefore slower.
+  This change has no effect for code that uses such old indexes,
+  they behave as before (fields are automatically decompressed
+  during read). Indexes converted to Lucene 3.0 format cannot be read
+  anymore with previous versions.
+  It is recommended to optimize your indexes after upgrading to convert
+  to the new format and decompress all fields.
+  If you want compressed fields, you can use CompressionTools, that
+  creates compressed byte[] to be added as binary stored field. This
+  cannot be done automatically, as you also have to decompress such
+  fields when reading. You have to reindex to do that.
+  (Michael Busch, Uwe Schindler)
+
 API Changes
 
 * LUCENE-1257, LUCENE-1984, LUCENE-1985, ...: Port to Java 1.5 [not yet finished].

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java?rev=829972&r1=829971&r2=829972&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java Mon Oct 26 21:16:57
2009
@@ -19,6 +19,7 @@
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.AbstractField;
+import org.apache.lucene.document.CompressionTools;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldSelector;
@@ -32,6 +33,7 @@
 
 import java.io.IOException;
 import java.io.Reader;
+import java.util.zip.DataFormatException;
 
 /**
  * Class responsible for access to stored document fields.
@@ -203,7 +205,11 @@
   }
 
   boolean canReadRawDocs() {
-    return format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES;
+    // Disable reading raw docs in 2.x format, because of the removal of compressed
+    // fields in 3.0. We don't want rawDocs() to decode field bits to figure out
+    // if a field was compressed, hence we enforce ordinary (non-raw) stored field merges
+    // for <3.0 indexes.
+    return format >= FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
   }
 
   final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException
{
@@ -219,31 +225,34 @@
       FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD
: fieldSelector.accept(fi.name);
       
       byte bits = fieldsStream.readByte();
-      assert bits <= FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY;
+      assert bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED
+ FieldsWriter.FIELD_IS_BINARY;
 
+      boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
+      assert (compressed ? (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS)
: true)
+        : "compressed fields are only allowed in indexes of version <= 2.9";
       boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
       boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
       //TODO: Find an alternative approach here if this list continues to grow beyond the
       //list of 5 or 6 currently here.  See Lucene 762 for discussion
       if (acceptField.equals(FieldSelectorResult.LOAD)) {
-        addField(doc, fi, binary, tokenize);
+        addField(doc, fi, binary, compressed, tokenize);
       }
       else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
-        addField(doc, fi, binary, tokenize);
+        addField(doc, fi, binary, compressed, tokenize);
         break;//Get out of this loop
       }
       else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
-        addFieldLazy(doc, fi, binary, tokenize);
+        addFieldLazy(doc, fi, binary, compressed, tokenize);
       }
       else if (acceptField.equals(FieldSelectorResult.SIZE)){
-        skipField(binary, addFieldSize(doc, fi, binary));
+        skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed));
       }
       else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
-        addFieldSize(doc, fi, binary);
+        addFieldSize(doc, fi, binary, compressed);
         break;
       }
       else {
-        skipField(binary);
+        skipField(binary, compressed);
       }
     }
 
@@ -280,12 +289,12 @@
    * Skip the field.  We still have to read some of the information about the field, but
can skip past the actual content.
    * This will have the most payoff on large fields.
    */
-  private void skipField(boolean binary) throws IOException {
-    skipField(binary, fieldsStream.readVInt());
+  private void skipField(boolean binary, boolean compressed) throws IOException {
+    skipField(binary, compressed, fieldsStream.readVInt());
   }
   
-  private void skipField(boolean binary, int toRead) throws IOException {
-   if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary) {
+  private void skipField(boolean binary, boolean compressed, int toRead) throws IOException
{
+   if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed)
{
      fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
    } else {
      // We need to skip chars.  This will slow us down, but still better
@@ -293,12 +302,12 @@
    }
   }
 
-  private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize)
throws IOException {
+  private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed,
boolean tokenize) throws IOException {
     if (binary) {
       int toRead = fieldsStream.readVInt();
       long pointer = fieldsStream.getFilePointer();
       //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
-      doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary));
+      doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, compressed));
       //Need to move the pointer ahead by toRead positions
       fieldsStream.seek(pointer + toRead);
     } else {
@@ -307,43 +316,75 @@
       Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector,
fi.storePositionWithTermVector);
 
       AbstractField f;
-      int length = fieldsStream.readVInt();
-      long pointer = fieldsStream.getFilePointer();
-      //Skip ahead of where we are by the length of what is stored
-      if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
-        fieldsStream.seek(pointer+length);
-      else
-        fieldsStream.skipChars(length);
-      f = new LazyField(fi.name, store, index, termVector, length, pointer, binary);
-      f.setOmitNorms(fi.omitNorms);
-      f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+      if (compressed) {
+        int toRead = fieldsStream.readVInt();
+        long pointer = fieldsStream.getFilePointer();
+        f = new LazyField(fi.name, store, toRead, pointer, binary, compressed);
+        //skip over the part that we aren't loading
+        fieldsStream.seek(pointer + toRead);
+        f.setOmitNorms(fi.omitNorms);
+        f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+      } else {
+        int length = fieldsStream.readVInt();
+        long pointer = fieldsStream.getFilePointer();
+        //Skip ahead of where we are by the length of what is stored
+        if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
+          fieldsStream.seek(pointer+length);
+        } else {
+          fieldsStream.skipChars(length);
+        }
+        f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, compressed);
+        f.setOmitNorms(fi.omitNorms);
+        f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+      }
+      
       doc.add(f);
     }
 
   }
 
-  private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws
CorruptIndexException, IOException {
+  private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean
tokenize) throws CorruptIndexException, IOException {
 
     //we have a binary stored field, and it may be compressed
     if (binary) {
       int toRead = fieldsStream.readVInt();
       final byte[] b = new byte[toRead];
       fieldsStream.readBytes(b, 0, b.length);
-      doc.add(new Field(fi.name, b, Field.Store.YES));
+      if (compressed) {
+        doc.add(new Field(fi.name, uncompress(b), Field.Store.YES));
+      } else {
+        doc.add(new Field(fi.name, b, Field.Store.YES));
+      }
     } else {
       Field.Store store = Field.Store.YES;
       Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
       Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector,
fi.storePositionWithTermVector);
 
       AbstractField f;
-      f = new Field(fi.name,     // name
-    		false,
-              fieldsStream.readString(), // read value
-              store,
-              index,
-              termVector);
-      f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
-      f.setOmitNorms(fi.omitNorms);
+      if (compressed) {
+        int toRead = fieldsStream.readVInt();
+
+        final byte[] b = new byte[toRead];
+        fieldsStream.readBytes(b, 0, b.length);
+        f = new Field(fi.name,      // field name
+                false,
+                new String(uncompress(b), "UTF-8"), // uncompress the value and add as string
+                store,
+                index,
+                termVector);
+        f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+        f.setOmitNorms(fi.omitNorms);
+      } else {
+        f = new Field(fi.name,     // name
+         false,
+                fieldsStream.readString(), // read value
+                store,
+                index,
+                termVector);
+        f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+        f.setOmitNorms(fi.omitNorms);
+      }
+      
       doc.add(f);
     }
   }
@@ -351,8 +392,8 @@
   // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high
order byte first; char = 2 bytes)
   // Read just the size -- caller must skip the field content to continue reading fields
   // Return the size in bytes or chars, depending on field type
-  private int addFieldSize(Document doc, FieldInfo fi, boolean binary) throws IOException
{
-    int size = fieldsStream.readVInt(), bytesize = binary ? size : 2*size;
+  private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed)
throws IOException {
+    int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size;
     byte[] sizebytes = new byte[4];
     sizebytes[0] = (byte) (bytesize>>>24);
     sizebytes[1] = (byte) (bytesize>>>16);
@@ -369,8 +410,10 @@
   private class LazyField extends AbstractField implements Fieldable {
     private int toRead;
     private long pointer;
+    /** @deprecated Only kept for backward-compatbility with <3.0 indexes. Will be removed
in 4.0. */
+    private boolean isCompressed;
 
-    public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary)
{
+    public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary,
boolean isCompressed) {
       super(name, store, Field.Index.NO, Field.TermVector.NO);
       this.toRead = toRead;
       this.pointer = pointer;
@@ -378,9 +421,10 @@
       if (isBinary)
         binaryLength = toRead;
       lazy = true;
+      this.isCompressed = isCompressed;
     }
 
-    public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector
termVector, int toRead, long pointer, boolean isBinary) {
+    public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector
termVector, int toRead, long pointer, boolean isBinary, boolean isCompressed) {
       super(name, store, index, termVector);
       this.toRead = toRead;
       this.pointer = pointer;
@@ -388,6 +432,7 @@
       if (isBinary)
         binaryLength = toRead;
       lazy = true;
+      this.isCompressed = isCompressed;
     }
 
     private IndexInput getFieldStream() {
@@ -427,15 +472,21 @@
           IndexInput localFieldsStream = getFieldStream();
           try {
             localFieldsStream.seek(pointer);
-            if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
-              byte[] bytes = new byte[toRead];
-              localFieldsStream.readBytes(bytes, 0, toRead);
-              fieldsData = new String(bytes, "UTF-8");
+            if (isCompressed) {
+              final byte[] b = new byte[toRead];
+              localFieldsStream.readBytes(b, 0, b.length);
+              fieldsData = new String(uncompress(b), "UTF-8");
             } else {
-              //read in chars b/c we already know the length we need to read
-              char[] chars = new char[toRead];
-              localFieldsStream.readChars(chars, 0, toRead);
-              fieldsData = new String(chars);
+              if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
+                byte[] bytes = new byte[toRead];
+                localFieldsStream.readBytes(bytes, 0, toRead);
+                fieldsData = new String(bytes, "UTF-8");
+              } else {
+                //read in chars b/c we already know the length we need to read
+                char[] chars = new char[toRead];
+                localFieldsStream.readChars(chars, 0, toRead);
+                fieldsData = new String(chars);
+              }
             }
           } catch (IOException e) {
             throw new FieldReaderException(e);
@@ -484,7 +535,11 @@
           try {
             localFieldsStream.seek(pointer);
             localFieldsStream.readBytes(b, 0, toRead);
-            fieldsData = b;
+            if (isCompressed == true) {
+              fieldsData = uncompress(b);
+            } else {
+              fieldsData = b;
+            }
           } catch (IOException e) {
             throw new FieldReaderException(e);
           }
@@ -498,4 +553,16 @@
         return null;     
     }
   }
+
+  private byte[] uncompress(byte[] b)
+          throws CorruptIndexException {
+    try {
+      return CompressionTools.decompress(b);
+    } catch (DataFormatException e) {
+      // this will happen if the field is not compressed
+      CorruptIndexException newException = new CorruptIndexException("field data are in wrong
format: " + e.toString());
+      newException.initCause(e);
+      throw newException;
+    }
+  }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java?rev=829972&r1=829971&r2=829972&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldsWriter.java Mon Oct 26 21:16:57
2009
@@ -30,17 +30,23 @@
 {
   static final byte FIELD_IS_TOKENIZED = 0x1;
   static final byte FIELD_IS_BINARY = 0x2;
+  
+  /** @deprecated Kept for backwards-compatibility with <3.0 indexes; will be removed
in 4.0 */
+  static final byte FIELD_IS_COMPRESSED = 0x4;
 
   // Original format
   static final int FORMAT = 0;
 
   // Changed strings to UTF8
   static final int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = 1;
+  
+  // Lucene 3.0: Removal of compressed fields
+  static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
 
   // NOTE: if you introduce a new format, make it 1 higher
   // than the current one, and always change this if you
   // switch to a new format!
-  static final int FORMAT_CURRENT = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES;
+  static final int FORMAT_CURRENT = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
   
     private FieldInfos fieldInfos;
 

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?rev=829972&r1=829971&r2=829972&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Mon
Oct 26 21:16:57 2009
@@ -22,21 +22,28 @@
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
 import java.io.OutputStream;
 import java.util.Arrays;
 import java.util.Enumeration;
 import java.util.List;
+import java.util.ArrayList;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipFile;
 
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.document.FieldSelectorResult;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.ReaderUtil;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
 
@@ -127,22 +134,105 @@
                              "23.nocfs",
                              "24.cfs",
                              "24.nocfs",
+                             "29.cfs",
+                             "29.nocfs",
   };
+  
+  private void assertCompressedFields29(Directory dir, boolean shouldStillBeCompressed) throws
IOException {
+    int count = 0;
+    final int TEXT_PLAIN_LENGTH = TEXT_TO_COMPRESS.length() * 2;
+    // FieldSelectorResult.SIZE returns 2*number_of_chars for String fields:
+    final int BINARY_PLAIN_LENGTH = BINARY_TO_COMPRESS.length;
+    
+    IndexReader reader = IndexReader.open(dir, true);
+    try {
+      // look into sub readers and check if raw merge is on/off
+      List<IndexReader> readers = new ArrayList<IndexReader>();
+      ReaderUtil.gatherSubReaders(readers, reader);
+      for (IndexReader ir : readers) {
+        final FieldsReader fr = ((SegmentReader) ir).getFieldsReader();
+        assertTrue("for a 2.9 index, FieldsReader.canReadRawDocs() must be false and other
way round for a trunk index",
+          shouldStillBeCompressed != fr.canReadRawDocs());
+      }
+    
+      // test that decompression works correctly
+      for(int i=0; i<reader.maxDoc(); i++) {
+        if (!reader.isDeleted(i)) {
+          Document d = reader.document(i);
+          if (d.get("content3") != null) continue;
+          count++;
+          Fieldable compressed = d.getFieldable("compressed");
+          if (Integer.parseInt(d.get("id")) % 2 == 0) {
+            assertFalse(compressed.isBinary());
+            assertEquals("incorrectly decompressed string", TEXT_TO_COMPRESS, compressed.stringValue());
+          } else {
+            assertTrue(compressed.isBinary());
+            assertTrue("incorrectly decompressed binary", Arrays.equals(BINARY_TO_COMPRESS,
compressed.getBinaryValue()));
+          }
+        }
+      }
+      
+      // check if field was decompressed after optimize
+      for(int i=0; i<reader.maxDoc(); i++) {
+        if (!reader.isDeleted(i)) {
+          Document d = reader.document(i, new FieldSelector() {
+            public FieldSelectorResult accept(String fieldName) {
+              return ("compressed".equals(fieldName)) ? FieldSelectorResult.SIZE : FieldSelectorResult.LOAD;
+            }
+          });
+          if (d.get("content3") != null) continue;
+          count++;
+          // read the size from the binary value using DataInputStream (this prevents us
from doing the shift ops ourselves):
+          final DataInputStream ds = new DataInputStream(new ByteArrayInputStream(d.getFieldable("compressed").getBinaryValue()));
+          final int actualSize = ds.readInt();
+          ds.close();
+          final int compressedSize = Integer.parseInt(d.get("compressedSize"));
+          final boolean binary = Integer.parseInt(d.get("id")) % 2 > 0;
+          final int shouldSize = shouldStillBeCompressed ?
+            compressedSize :
+            (binary ? BINARY_PLAIN_LENGTH : TEXT_PLAIN_LENGTH);
+          assertEquals("size incorrect", shouldSize, actualSize);
+          if (!shouldStillBeCompressed) {
+            assertFalse("uncompressed field should have another size than recorded in index",
compressedSize == actualSize);
+          }
+        }
+      }
+      assertEquals("correct number of tests", 34 * 2, count);
+    } finally {
+      reader.close();
+    }
+  }
 
   public void testOptimizeOldIndex() throws IOException {
+    int hasTested29 = 0;
+    
     for(int i=0;i<oldNames.length;i++) {
       String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
       unzip(dirName, oldNames[i]);
       String fullPath = fullDir(oldNames[i]);
       Directory dir = FSDirectory.open(new File(fullPath));
+
+      if (oldNames[i].startsWith("29.")) {
+        assertCompressedFields29(dir, true);
+        hasTested29++;
+      }
+
       IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
       w.optimize();
       w.close();
 
       _TestUtil.checkIndex(dir);
+      
+      if (oldNames[i].startsWith("29.")) {
+        assertCompressedFields29(dir, false);
+        hasTested29++;
+      }
+
       dir.close();
       rmDir(oldNames[i]);
     }
+    
+    assertEquals("test for compressed field should have run 4 times", 4, hasTested29);
   }
 
   public void testSearchOldIndex() throws IOException {
@@ -203,7 +293,8 @@
             !oldName.startsWith("22.")) {
 
           if (d.getField("content3") == null) {
-            assertEquals(5, fields.size());
+            final int numFields = oldName.startsWith("29.") ? 7 : 5;
+            assertEquals(numFields, fields.size());
             Field f = (Field) d.getField("id");
             assertEquals(""+i, f.stringValue());
 
@@ -497,6 +588,15 @@
     doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd",
Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
     doc.add(new Field("content2", "here is more content with aaa aaa aaa", Field.Store.YES,
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
     doc.add(new Field("fie\u2C77ld", "field with non-ascii name", Field.Store.YES, Field.Index.ANALYZED,
Field.TermVector.WITH_POSITIONS_OFFSETS));
+    /* This was used in 2.9 to generate an index with compressed field:
+    if (id % 2 == 0) {
+      doc.add(new Field("compressed", TEXT_TO_COMPRESS, Field.Store.COMPRESS, Field.Index.NOT_ANALYZED));
+      doc.add(new Field("compressedSize", Integer.toString(TEXT_COMPRESSED_LENGTH), Field.Store.YES,
Field.Index.NOT_ANALYZED));
+    } else {
+      doc.add(new Field("compressed", BINARY_TO_COMPRESS, Field.Store.COMPRESS));    
+      doc.add(new Field("compressedSize", Integer.toString(BINARY_COMPRESSED_LENGTH), Field.Store.YES,
Field.Index.NOT_ANALYZED));
+    }
+    */
     writer.addDocument(doc);
   }
 
@@ -527,4 +627,22 @@
   public static String fullDir(String dirName) throws IOException {
     return new File(System.getProperty("tempDir"), dirName).getCanonicalPath();
   }
+
+  static final String TEXT_TO_COMPRESS = "this is a compressed field and should appear in
3.0 as an uncompressed field after merge";
+  // FieldSelectorResult.SIZE returns compressed size for compressed fields, which are internally
handled as binary;
+  // do it in the same way like FieldsWriter, do not use CompressionTools.compressString()
for compressed fields:
+  /* This was used in 2.9 to generate an index with compressed field:
+  static final int TEXT_COMPRESSED_LENGTH;
+  static {
+    try {
+      TEXT_COMPRESSED_LENGTH = CompressionTools.compress(TEXT_TO_COMPRESS.getBytes("UTF-8")).length;
+    } catch (Exception e) {
+      throw new RuntimeException();
+    }
+  }
+  */
+  static final byte[] BINARY_TO_COMPRESS = new byte[]{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20};
+  /* This was used in 2.9 to generate an index with compressed field:
+  static final int BINARY_COMPRESSED_LENGTH = CompressionTools.compress(BINARY_TO_COMPRESS).length;
+  */
 }

Added: lucene/java/trunk/src/test/org/apache/lucene/index/index.29.cfs.zip
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/index.29.cfs.zip?rev=829972&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/java/trunk/src/test/org/apache/lucene/index/index.29.cfs.zip
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/java/trunk/src/test/org/apache/lucene/index/index.29.nocfs.zip
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/index.29.nocfs.zip?rev=829972&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/java/trunk/src/test/org/apache/lucene/index/index.29.nocfs.zip
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream



Mime
View raw message