lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From uschind...@apache.org
Subject svn commit: r1029374 - in /lucene/java/branches/lucene_3_0/contrib: ./ analyzers/common/src/java/org/apache/lucene/analysis/compound/
Date Sun, 31 Oct 2010 14:46:37 GMT
Author: uschindler
Date: Sun Oct 31 14:46:37 2010
New Revision: 1029374

URL: http://svn.apache.org/viewvc?rev=1029374&view=rev
Log:
Bugfix-only backport: LUCENE-2731, LUCENE-2732: Fix (charset) problems in XML loading in HyphenationCompoundWordTokenFilter

Modified:
    lucene/java/branches/lucene_3_0/contrib/CHANGES.txt
    lucene/java/branches/lucene_3_0/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
    lucene/java/branches/lucene_3_0/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java

Modified: lucene/java/branches/lucene_3_0/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_3_0/contrib/CHANGES.txt?rev=1029374&r1=1029373&r2=1029374&view=diff
==============================================================================
--- lucene/java/branches/lucene_3_0/contrib/CHANGES.txt (original)
+++ lucene/java/branches/lucene_3_0/contrib/CHANGES.txt Sun Oct 31 14:46:37 2010
@@ -18,6 +18,11 @@ Bug Fixes
 
  * LUCENE-2616: FastVectorHighlighter: out of alignment when the first value is
    empty in multiValued field (Koji Sekiguchi)
+   
+ * LUCENE-2731, LUCENE-2732: Fix (charset) problems in XML loading in
+   HyphenationCompoundWordTokenFilter (partial bugfix-only in 2.9 and 3.0,
+   full fix will be in later 3.1).
+   (Uwe Schinder)
 
 Documentation
 

Modified: lucene/java/branches/lucene_3_0/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_3_0/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java?rev=1029374&r1=1029373&r2=1029374&view=diff
==============================================================================
--- lucene/java/branches/lucene_3_0/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
(original)
+++ lucene/java/branches/lucene_3_0/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
Sun Oct 31 14:46:37 2010
@@ -22,6 +22,7 @@ import java.util.Arrays;
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.LinkedList;
+import java.util.Locale;
 import java.util.Set;
 
 import org.apache.lucene.analysis.CharArraySet;
@@ -170,7 +171,7 @@ public abstract class CompoundWordTokenF
     Iterator iter=col.iterator();
     
     while (iter.hasNext()) {
-      target.add(((String)iter.next()).toLowerCase());
+      target.add(((String)iter.next()).toLowerCase(Locale.ENGLISH));
     }
   }
   

Modified: lucene/java/branches/lucene_3_0/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_3_0/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java?rev=1029374&r1=1029373&r2=1029374&view=diff
==============================================================================
--- lucene/java/branches/lucene_3_0/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
(original)
+++ lucene/java/branches/lucene_3_0/contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
Sun Oct 31 14:46:37 2010
@@ -18,8 +18,6 @@ package org.apache.lucene.analysis.compo
  */
 
 import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStreamReader;
 import java.io.Reader;
 import java.util.Set;
 
@@ -117,7 +115,7 @@ public class HyphenationCompoundWordToke
    */
   public static HyphenationTree getHyphenationTree(String hyphenationFilename)
       throws Exception {
-    return getHyphenationTree(new File(hyphenationFilename));
+    return getHyphenationTree(new InputSource(hyphenationFilename));
   }
 
   /**
@@ -129,8 +127,7 @@ public class HyphenationCompoundWordToke
    */
   public static HyphenationTree getHyphenationTree(File hyphenationFile)
       throws Exception {
-    return getHyphenationTree(new InputStreamReader(new FileInputStream(
-        hyphenationFile), "ISO-8859-1"));
+    return getHyphenationTree(new InputSource(hyphenationFile.toURL().toExternalForm()));
   }
 
   /**
@@ -142,10 +139,25 @@ public class HyphenationCompoundWordToke
    */
   public static HyphenationTree getHyphenationTree(Reader hyphenationReader)
       throws Exception {
-    HyphenationTree tree = new HyphenationTree();
-
-    tree.loadPatterns(new InputSource(hyphenationReader));
+    final InputSource is = new InputSource(hyphenationReader);
+    // we need this to load the DTD in very old parsers (like the one in JDK 1.4).
+    // The DTD itsself is provided via EntityResolver, so it should always load, but
+    // some parsers still want to have a base URL (Crimson).
+    is.setSystemId("urn:java:" + HyphenationTree.class.getName());
+    return getHyphenationTree(is);
+  }
 
+  /**
+   * Create a hyphenator tree
+   * 
+   * @param hyphenationSource the InputSource pointing to the XML grammar
+   * @return An object representing the hyphenation patterns
+   * @throws Exception
+   */
+  public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
+      throws Exception {
+    HyphenationTree tree = new HyphenationTree();
+    tree.loadPatterns(hyphenationSource);
     return tree;
   }
 



Mime
View raw message