lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r770630 - in /lucene/java/branches/lucene_2_4: ./ src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/
Date Fri, 01 May 2009 10:53:59 GMT
Author: mikemccand
Date: Fri May  1 10:53:58 2009
New Revision: 770630

URL: http://svn.apache.org/viewvc?rev=770630&view=rev
Log:
LUCENE-1623: properly handle back-compatibility of indexes containing field names with non-ascii
characters

Modified:
    lucene/java/branches/lucene_2_4/   (props changed)
    lucene/java/branches/lucene_2_4/CHANGES.txt
    lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/index/FieldInfos.java
    lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
    lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/index/index.23.cfs.zip
    lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/index/index.23.nocfs.zip

Propchange: lucene/java/branches/lucene_2_4/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri May  1 10:53:58 2009
@@ -1 +1 @@
-/lucene/java/trunk:708549,709456,712233,718540,719716,723149,734415,735043,746661,747251,748534,749326,750162
+/lucene/java/trunk:708549,709456,712233,718540,719716,723149,734415,735043,746661,747251,748534,749326,750162,770625

Modified: lucene/java/branches/lucene_2_4/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_4/CHANGES.txt?rev=770630&r1=770629&r2=770630&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_4/CHANGES.txt (original)
+++ lucene/java/branches/lucene_2_4/CHANGES.txt Fri May  1 10:53:58 2009
@@ -9,6 +9,10 @@
    could cause "infinite merging" to happen.  (Christiaan Fluit via
    Mike McCandless)
 
+2. LUCENE-1623: Properly handle back-compability of 2.3.x indexes that
+   contain field names with non-ascii characters.  (Mike Streeton via
+   Mike McCandless)
+
 ======================= Release 2.4.1 2009-03-09 =======================
 
 Bug fixes

Modified: lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/index/FieldInfos.java?rev=770630&r1=770629&r2=770630&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/index/FieldInfos.java Fri May
 1 10:53:58 2009
@@ -57,7 +57,23 @@
   FieldInfos(Directory d, String name) throws IOException {
     IndexInput input = d.openInput(name);
     try {
-      read(input);
+      try {
+        read(input, name);
+      } catch (IOException ioe) {
+        // LUCENE-1623: this may be 2.3.2 (pre-utf8) or
+        // 2.4.x (utf8) encoding; retry with input set to
+        // pre-utf8
+        input.seek(0);
+        input.setModifiedUTF8StringsMode();
+        byNumber = new ArrayList();
+        byName = new HashMap();
+        try {
+          read(input, name);
+        } catch (Throwable t) {
+          // Ignore any new exception & throw original IOE
+          throw ioe;
+        }
+      }
     } finally {
       input.close();
     }
@@ -307,8 +323,8 @@
     }
   }
 
-  private void read(IndexInput input) throws IOException {
-    int size = input.readVInt();//read in the size
+  private void read(IndexInput input, String fileName) throws IOException {
+    int size = input.readVInt(); //read in the size
     for (int i = 0; i < size; i++) {
       String name = input.readString().intern();
       byte bits = input.readByte();
@@ -321,6 +337,10 @@
       boolean omitTf = (bits & OMIT_TF) != 0;
       
       addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector,
omitNorms, storePayloads, omitTf);
+    }
+
+    if (input.getFilePointer() != input.length()) {
+      throw new CorruptIndexException("did not read all bytes from file \"" + fileName +
"\": read " + input.getFilePointer() + " vs size " + input.length());
     }    
   }
 

Modified: lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?rev=770630&r1=770629&r2=770630&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
(original)
+++ lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
Fri May  1 10:53:58 2009
@@ -203,8 +203,12 @@
       if (!reader.isDeleted(i)) {
         Document d = reader.document(i);
         List fields = d.getFields();
-        if (oldName.startsWith("23.")) {
-          assertEquals(4, fields.size());
+        if (!oldName.startsWith("19.") &&
+            !oldName.startsWith("20.") &&
+            !oldName.startsWith("21.") &&
+            !oldName.startsWith("22.")) {
+          // Test on indices >= 2.3
+          assertEquals(5, fields.size());
           Field f = (Field) d.getField("id");
           assertEquals(""+i, f.stringValue());
 
@@ -216,7 +220,10 @@
         
           f = (Field) d.getField("content2");
           assertEquals("here is more content with aaa aaa aaa", f.stringValue());
-        }        
+
+          f = (Field) d.getField("fie\u2C77ld");
+          assertEquals("field with non-ascii name", f.stringValue());
+        }       
       } else
         // Only ID 7 is deleted
         assertEquals(7, i);
@@ -479,6 +486,7 @@
     doc.add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd",
Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
     doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd",
Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
     doc.add(new Field("content2", "here is more content with aaa aaa aaa", Field.Store.YES,
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
+    doc.add(new Field("fie\u2C77ld", "field with non-ascii name", Field.Store.YES, Field.Index.TOKENIZED,
Field.TermVector.WITH_POSITIONS_OFFSETS));
     writer.addDocument(doc);
   }
 

Modified: lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/index/index.23.cfs.zip
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/index/index.23.cfs.zip?rev=770630&r1=770629&r2=770630&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/index/index.23.nocfs.zip
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/index/index.23.nocfs.zip?rev=770630&r1=770629&r2=770630&view=diff
==============================================================================
Binary files - no diff available.



Mime
View raw message