lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r567338 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/index/DocumentsWriter.java src/java/org/apache/lucene/index/IndexWriter.java src/test/org/apache/lucene/index/TestIndexWriter.java
Date Sat, 18 Aug 2007 23:15:15 GMT
Author: mikemccand
Date: Sat Aug 18 16:15:14 2007
New Revision: 567338

URL: http://svn.apache.org/viewvc?view=rev&rev=567338
Log:
LUCENE-985: throw clearer exception when term is too long (> 16383 chars)

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=diff&rev=567338&r1=567337&r2=567338
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Sat Aug 18 16:15:14 2007
@@ -76,6 +76,11 @@
     close any streams they had opened if an exception is hit in the
     constructor.  (Ning Li via Mike McCandless)
 
+12. LUCENE-985: If an extremely long term is in a doc (> 16383 chars),
+    we now throw an IllegalArgumentException saying the term is too
+    long, instead of cryptic ArrayIndexOutOfBoundsException.  (Karl
+    Wettin via Mike McCandless)
+
 New features
 
  1. LUCENE-906: Elision filter for French.

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java?view=diff&rev=567338&r1=567337&r2=567338
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java Sat Aug 18 16:15:14
2007
@@ -1452,8 +1452,11 @@
           p = postingsFreeList[--postingsFreeCount];
 
           final int textLen1 = 1+tokenTextLen;
-          if (textLen1 + charPool.byteUpto > CHAR_BLOCK_SIZE)
+          if (textLen1 + charPool.byteUpto > CHAR_BLOCK_SIZE) {
+            if (textLen1 > CHAR_BLOCK_SIZE)
+              throw new IllegalArgumentException("term length " + tokenTextLen + " exceeds
max term length " + (CHAR_BLOCK_SIZE-1));
             charPool.nextBuffer();
+          }
           final char[] text = charPool.buffer;
           final int textUpto = charPool.byteUpto;
           p.textStart = textUpto + charPool.byteOffset;

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java?view=diff&rev=567338&r1=567337&r2=567338
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java Sat Aug 18 16:15:14
2007
@@ -1091,6 +1091,10 @@
    * temporary space usage) then the maximum free disk space
    * required is the same as {@link #optimize}.</p>
    *
+   * <p>Note that each term in the document can be no longer
+   * than 16383 characters, otherwise an
+   * IllegalArgumentException will be thrown.</p>
+   *
    * @throws CorruptIndexException if the index is corrupt
    * @throws IOException if there is a low-level IO error
    */

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java?view=diff&rev=567338&r1=567337&r2=567338
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java Sat Aug 18 16:15:14
2007
@@ -25,6 +25,7 @@
 import junit.framework.TestCase;
 
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
@@ -488,6 +489,28 @@
       if (!Arrays.equals(startFiles, endFiles)) {
         fail(message + ": before delete:\n    " + arrayToString(startFiles) + "\n  after
delete:\n    " + arrayToString(endFiles));
       }
+    }
+
+    /**
+     * Make sure we get a friendly exception for a wicked
+     * long term.
+    */
+    public void testWickedLongTerm() throws IOException {
+      RAMDirectory dir = new RAMDirectory();
+      IndexWriter writer  = new IndexWriter(dir, new StandardAnalyzer(), true);
+
+      char[] chars = new char[16384];
+      Arrays.fill(chars, 'x');
+      Document doc = new Document();
+      String contents = "a b c " + new String(chars);
+      doc.add(new Field("content", contents, Field.Store.NO, Field.Index.TOKENIZED));
+      try {
+        writer.addDocument(doc);
+        fail("did not hit expected exception");
+      } catch (IllegalArgumentException e) {
+      }
+      writer.close();
+      dir.close();
     }
 
     /**



Mime
View raw message