lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r964430 - in /lucene/dev/trunk/solr: CHANGES.txt src/java/org/apache/solr/core/SolrResourceLoader.java src/test/org/apache/solr/core/ResourceLoaderTest.java src/test/test-files/solr/conf/stopwordsWrongEncoding.txt
Date Thu, 15 Jul 2010 13:50:48 GMT
Author: rmuir
Date: Thu Jul 15 13:50:48 2010
New Revision: 964430

URL: http://svn.apache.org/viewvc?rev=964430&view=rev
Log:
SOLR-2003: report (throw exception) rather than replace charset errors in SolrResourceLoader.getLines

Added:
    lucene/dev/trunk/solr/src/test/test-files/solr/conf/stopwordsWrongEncoding.txt   (with
props)
Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/src/java/org/apache/solr/core/SolrResourceLoader.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/core/ResourceLoaderTest.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=964430&r1=964429&r2=964430&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Thu Jul 15 13:50:48 2010
@@ -467,6 +467,9 @@ Other Changes
 * SOLR-1946: Misc improvements to the SystemInfoHandler: /admin/system
   (hossman)
 
+* SOLR-2003: SolrResourceLoader will report any encoding errors, rather than
+  silently using replacement characters for invalid inputs (blargy via rmuir)
+
 Build
 ----------------------
 

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/core/SolrResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/core/SolrResourceLoader.java?rev=964430&r1=964429&r2=964430&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/core/SolrResourceLoader.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/core/SolrResourceLoader.java Thu Jul 15
13:50:48 2010
@@ -33,6 +33,7 @@ import java.util.concurrent.ConcurrentHa
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.nio.charset.Charset;
+import java.nio.charset.CodingErrorAction;
 import java.lang.reflect.Constructor;
 
 import javax.naming.Context;
@@ -316,7 +317,9 @@ public class SolrResourceLoader implemen
     ArrayList<String> lines;
     try {
       input = new BufferedReader(new InputStreamReader(openResource(resource),
-          charset));
+          charset.newDecoder()
+          .onMalformedInput(CodingErrorAction.REPORT)
+          .onUnmappableCharacter(CodingErrorAction.REPORT)));
 
       lines = new ArrayList<String>();
       for (String word=null; (word=input.readLine())!=null;) {

Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/core/ResourceLoaderTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/core/ResourceLoaderTest.java?rev=964430&r1=964429&r2=964430&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/core/ResourceLoaderTest.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/core/ResourceLoaderTest.java Thu Jul 15
13:50:48 2010
@@ -31,6 +31,7 @@ import org.apache.solr.util.plugin.SolrC
 
 import java.io.File;
 import java.io.InputStream;
+import java.nio.charset.MalformedInputException;
 import java.util.Arrays;
 import java.util.List;
 
@@ -117,4 +118,14 @@ public class ResourceLoaderTest extends 
     assertEquals(1, lines.size());
     assertEquals("BOMsAreEvil", lines.get(0));
   }
+  
+  public void testWrongEncoding() throws Exception {
+    String wrongEncoding = "stopwordsWrongEncoding.txt";
+    SolrResourceLoader loader = new SolrResourceLoader(null);
+    // ensure we get our exception
+    try {
+      List<String> lines = loader.getLines(wrongEncoding);
+      fail();
+    } catch (MalformedInputException expected) {}
+  }
 }

Added: lucene/dev/trunk/solr/src/test/test-files/solr/conf/stopwordsWrongEncoding.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/test-files/solr/conf/stopwordsWrongEncoding.txt?rev=964430&view=auto
==============================================================================
--- lucene/dev/trunk/solr/src/test/test-files/solr/conf/stopwordsWrongEncoding.txt (added)
+++ lucene/dev/trunk/solr/src/test/test-files/solr/conf/stopwordsWrongEncoding.txt Thu Jul
15 13:50:48 2010
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# stopwords in the wrong encoding (ISO-8859-1).
+# tests resourceloader's ability to report wrongly encoded files.
+baƱadores

Propchange: lucene/dev/trunk/solr/src/test/test-files/solr/conf/stopwordsWrongEncoding.txt
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message