lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1076017 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/contrib/ lucene/contrib/icu/src/java/org/apache/lucene/collation/ lucene/contrib/icu/src/test/org/apache/lucene/collation/ lucene/src/java/org/apache/lucene/collation/ lucene/s...
Date Tue, 01 Mar 2011 20:49:04 GMT
Author: rmuir
Date: Tue Mar  1 20:49:03 2011
New Revision: 1076017

URL: http://svn.apache.org/viewvc?rev=1076017&view=rev
Log:
LUCENE-2943: fix thread-safety issues with ICU collation

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
    lucene/dev/branches/branch_3x/lucene/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/collation/CollationKeyFilter.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/collation/CollationTestBase.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java
    lucene/dev/branches/branch_3x/solr/   (props changed)

Modified: lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt?rev=1076017&r1=1076016&r2=1076017&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt Tue Mar  1 20:49:03 2011
@@ -105,6 +105,9 @@ Bug fixes
 
  * LUCENE-2874: Highlighting overlapping tokens outputted doubled words.
    (Pierre Gossé via Robert Muir)
+
+ * LUCENE-2943: Fix thread-safety issues with ICUCollationKeyFilter. 
+   (Robert Muir)
    
 API Changes
 

Modified: lucene/dev/branches/branch_3x/lucene/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java?rev=1076017&r1=1076016&r2=1076017&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
(original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
Tue Mar  1 20:49:03 2011
@@ -81,7 +81,12 @@ public final class ICUCollationKeyFilter
    */
   public ICUCollationKeyFilter(TokenStream input, Collator collator) {
     super(input);
-    this.collator = collator;
+    // clone the collator: see http://userguide.icu-project.org/collation/architecture
+    try {
+      this.collator = (Collator) collator.clone();
+    } catch (CloneNotSupportedException e) {
+      throw new RuntimeException(e);
+    }
   }
 
   @Override

Modified: lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java?rev=1076017&r1=1076016&r2=1076017&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java
(original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java
Tue Mar  1 20:49:03 2011
@@ -76,4 +76,14 @@ public class TestICUCollationKeyAnalyzer
     (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, 
      "BFJHD", "ECAGI", "BJDFH", "BJDHF");
   }
+  
+  public void testThreadSafe() throws Exception {
+    int iters = 20 * RANDOM_MULTIPLIER;
+    for (int i = 0; i < iters; i++) {
+      Locale locale = randomLocale(random);
+      Collator collator = Collator.getInstance(locale);
+      collator.setStrength(Collator.IDENTICAL);
+      assertThreadSafe(new ICUCollationKeyAnalyzer(collator));
+    }
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/collation/CollationKeyFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/collation/CollationKeyFilter.java?rev=1076017&r1=1076016&r2=1076017&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/collation/CollationKeyFilter.java
(original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/collation/CollationKeyFilter.java
Tue Mar  1 20:49:03 2011
@@ -82,7 +82,9 @@ public final class CollationKeyFilter ex
    */
   public CollationKeyFilter(TokenStream input, Collator collator) {
     super(input);
-    this.collator = collator;
+    // clone in case JRE doesnt properly sync,
+    // or to reduce contention in case they do
+    this.collator = (Collator) collator.clone();
   }
 
   @Override

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/collation/CollationTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/collation/CollationTestBase.java?rev=1076017&r1=1076016&r2=1076017&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/collation/CollationTestBase.java
(original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/collation/CollationTestBase.java
Tue Mar  1 20:49:03 2011
@@ -21,6 +21,8 @@ package org.apache.lucene.collation;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
@@ -39,8 +41,12 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.util.IndexableBinaryStringTools;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
 
 import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
 
 public abstract class CollationTestBase extends LuceneTestCase {
 
@@ -259,4 +265,73 @@ public abstract class CollationTestBase 
     }
     assertEquals(expectedResult, buff.toString());
   }
+  
+  private String randomString() {
+    // ideally we could do this!
+    // return _TestUtil.randomUnicodeString(random);
+    //
+    // http://bugs.icu-project.org/trac/ticket/8060
+    // http://bugs.icu-project.org/trac/ticket/7732
+    // ...
+    // 
+    // as a workaround, just test the BMP for now (and avoid 0xFFFF etc)
+    int length = _TestUtil.nextInt(random, 0, 10);
+    char chars[] = new char[length];
+    for (int i = 0; i < length; i++) {
+      if (random.nextBoolean()) {
+        chars[i] = (char) _TestUtil.nextInt(random, 0, 0xD7FF);
+      } else {
+        chars[i] = (char) _TestUtil.nextInt(random, 0xE000, 0xFFFD);
+      }
+    }
+    return new String(chars, 0, length);
+  }
+
+  public void assertThreadSafe(final Analyzer analyzer) throws Exception {
+    int numTestPoints = 1000;
+    int numThreads = _TestUtil.nextInt(random, 3, 5);
+    final HashMap<String,String> map = new HashMap<String,String>();
+    
+    // create a map<String,SortKey> up front.
+    // then with multiple threads, generate sort keys for all the keys in the map
+    // and ensure they are the same as the ones we produced in serial fashion.
+
+    for (int i = 0; i < numTestPoints; i++) {
+      String term = randomString();
+      TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
+      CharTermAttribute encodedBytes = ts.addAttribute(CharTermAttribute.class);
+      ts.reset();
+      assertTrue(ts.incrementToken());
+      // ensure we make a copy of the actual bytes too
+      map.put(term, encodedBytes.toString());
+    }
+    
+    Thread threads[] = new Thread[numThreads];
+    for (int i = 0; i < numThreads; i++) {
+      threads[i] = new Thread() {
+        @Override
+        public void run() {
+          try {
+            for (Map.Entry<String,String> mapping : map.entrySet()) {
+              String term = mapping.getKey();
+              String expected = mapping.getValue();
+              TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
+              CharTermAttribute encodedBytes = ts.addAttribute(CharTermAttribute.class);
+              ts.reset();
+              assertTrue(ts.incrementToken());
+              assertEquals(expected, encodedBytes.toString());
+            }
+          } catch (IOException e) {
+            throw new RuntimeException(e);
+          }
+        }
+      };
+    }
+    for (int i = 0; i < numThreads; i++) {
+      threads[i].start();
+    }
+    for (int i = 0; i < numThreads; i++) {
+      threads[i].join();
+    }
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java?rev=1076017&r1=1076016&r2=1076017&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java
(original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java
Tue Mar  1 20:49:03 2011
@@ -79,4 +79,14 @@ public class TestCollationKeyAnalyzer ex
     (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, 
      oStrokeFirst ? "BFJHD" : "BFJDH", "EACGI", "BJDFH", "BJDHF");
   }
+  
+  public void testThreadSafe() throws Exception {
+    int iters = 20 * RANDOM_MULTIPLIER;
+    for (int i = 0; i < iters; i++) {
+      Locale locale = randomLocale(random);
+      Collator collator = Collator.getInstance(locale);
+      collator.setStrength(Collator.PRIMARY);
+      assertThreadSafe(new CollationKeyAnalyzer(collator));
+    }
+  }
 }



Mime
View raw message