jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mreut...@apache.org
Subject svn commit: r690282 - /jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/HTMLTextExtractor.java
Date Fri, 29 Aug 2008 15:01:07 GMT
Author: mreutegg
Date: Fri Aug 29 08:01:07 2008
New Revision: 690282

URL: http://svn.apache.org/viewvc?rev=690282&view=rev
Log:
JCR-1727: HTMLTextExtractor modifying UTF-8 encoded String

Modified:
    jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/HTMLTextExtractor.java

Modified: jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/HTMLTextExtractor.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/HTMLTextExtractor.java?rev=690282&r1=690281&r2=690282&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/HTMLTextExtractor.java
(original)
+++ jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/HTMLTextExtractor.java
Fri Aug 29 08:01:07 2008
@@ -31,6 +31,7 @@
 import java.io.InputStream;
 import java.io.IOException;
 import java.io.StringReader;
+import java.io.InputStreamReader;
 
 /**
  * Text extractor for HyperText Markup Language (HTML).
@@ -64,7 +65,13 @@
             HTMLParser parser = new HTMLParser();
             SAXResult result = new SAXResult(new DefaultHandler());
 
-            SAXSource source = new SAXSource(parser, new InputSource(stream));
+            Reader reader;
+            if (encoding != null) {
+                reader = new InputStreamReader(stream, encoding);
+            } else {
+                reader = new InputStreamReader(stream);
+            }
+            SAXSource source = new SAXSource(parser, new InputSource(reader));
             transformer.transform(source, result);
 
             return new StringReader(parser.getContents());



Mime
View raw message