camel-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From davscl...@apache.org
Subject [camel] branch master updated: CAMEL-12769: Combination of File consumer with charset and Split DSL with XPath doesn't parse XML correctly (#2505)
Date Tue, 04 Sep 2018 14:36:34 GMT
This is an automated email from the ASF dual-hosted git repository.

davsclaus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel.git


The following commit(s) were added to refs/heads/master by this push:
     new 8fa8bc9  CAMEL-12769: Combination of File consumer with charset and Split DSL with
XPath doesn't parse XML correctly (#2505)
8fa8bc9 is described below

commit 8fa8bc992a2d10a2efba7428da87cd79b7e08cd8
Author: Tadayoshi Sato <sato.tadayoshi@gmail.com>
AuthorDate: Tue Sep 4 23:36:31 2018 +0900

    CAMEL-12769: Combination of File consumer with charset and Split DSL with XPath doesn't
parse XML correctly (#2505)
---
 .../org/apache/camel/converter/IOConverter.java    | 87 ++++++++++++++--------
 .../apache/camel/converter/jaxp/XmlConverter.java  | 12 ++-
 .../camel/converter/IOConverterCharsetTest.java    | 18 ++---
 3 files changed, 75 insertions(+), 42 deletions(-)

diff --git a/camel-core/src/main/java/org/apache/camel/converter/IOConverter.java b/camel-core/src/main/java/org/apache/camel/converter/IOConverter.java
index 073547e..ae02a2c 100644
--- a/camel-core/src/main/java/org/apache/camel/converter/IOConverter.java
+++ b/camel-core/src/main/java/org/apache/camel/converter/IOConverter.java
@@ -81,40 +81,18 @@ public final class IOConverter {
         return IOHelper.buffered(new FileInputStream(file));
     }
 
+    /**
+     * Converts the given {@link File} with the given charset to {@link InputStream} with
the JVM default charset
+     *
+     * @param file the file to be converted
+     * @param charset the charset the file is read with
+     * @return the input stream with the JVM default charset
+     */
     public static InputStream toInputStream(File file, String charset) throws IOException
{
         if (charset != null) {
-            final BufferedReader reader = toReader(file, charset);
-            final Charset defaultStreamCharset = defaultCharset.get();
-            return new InputStream() {
-                private ByteBuffer bufferBytes;
-                private CharBuffer bufferedChars = CharBuffer.allocate(4096);
-
-                @Override
-                public int read() throws IOException {
-                    if (bufferBytes == null || bufferBytes.remaining() <= 0) {
-                        bufferedChars.clear();
-                        int len = reader.read(bufferedChars);
-                        bufferedChars.flip();
-                        if (len == -1) {
-                            return -1;
-                        }
-                        bufferBytes = defaultStreamCharset.encode(bufferedChars);
-                    }
-                    return bufferBytes.get();
-                }
-
-                @Override
-                public void close() throws IOException {
-                    reader.close();
-                }
-
-                @Override
-                public void reset() throws IOException {
-                    reader.reset();
-                }
-            };
+            return new EncodingInputStream(file, charset);
         } else {
-            return IOHelper.buffered(new FileInputStream(file));
+            return toInputStream(file);
         }
     }
 
@@ -501,6 +479,53 @@ public final class IOConverter {
     }
 
     /**
+     * Encoding-aware input stream.
+     */
+    public static class EncodingInputStream extends InputStream {
+
+        private final File file;
+        private final BufferedReader reader;
+        private final Charset defaultStreamCharset;
+
+        private ByteBuffer bufferBytes;
+        private CharBuffer bufferedChars = CharBuffer.allocate(4096);
+
+        public EncodingInputStream(File file, String charset) throws IOException {
+            this.file = file;
+            reader = toReader(file, charset);
+            defaultStreamCharset = defaultCharset.get();
+        }
+
+        @Override
+        public int read() throws IOException {
+            if (bufferBytes == null || bufferBytes.remaining() <= 0) {
+                bufferedChars.clear();
+                int len = reader.read(bufferedChars);
+                bufferedChars.flip();
+                if (len == -1) {
+                    return -1;
+                }
+                bufferBytes = defaultStreamCharset.encode(bufferedChars);
+            }
+            return bufferBytes.get();
+        }
+
+        @Override
+        public void close() throws IOException {
+            reader.close();
+        }
+
+        @Override
+        public void reset() throws IOException {
+            reader.reset();
+        }
+
+        public InputStream toOriginalInputStream() throws FileNotFoundException {
+            return new FileInputStream(file);
+        }
+    }
+
+    /**
      * Encoding-aware file reader. 
      */
     private static class EncodingFileReader extends InputStreamReader {
diff --git a/camel-core/src/main/java/org/apache/camel/converter/jaxp/XmlConverter.java b/camel-core/src/main/java/org/apache/camel/converter/jaxp/XmlConverter.java
index f8a8766..6d7c063 100644
--- a/camel-core/src/main/java/org/apache/camel/converter/jaxp/XmlConverter.java
+++ b/camel-core/src/main/java/org/apache/camel/converter/jaxp/XmlConverter.java
@@ -54,7 +54,6 @@ import javax.xml.transform.stax.StAXSource;
 import javax.xml.transform.stream.StreamResult;
 import javax.xml.transform.stream.StreamSource;
 
-import org.apache.camel.util.StringHelper;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
@@ -70,8 +69,10 @@ import org.apache.camel.BytesSource;
 import org.apache.camel.Converter;
 import org.apache.camel.Exchange;
 import org.apache.camel.StringSource;
+import org.apache.camel.converter.IOConverter;
 import org.apache.camel.util.IOHelper;
 import org.apache.camel.util.ObjectHelper;
+import org.apache.camel.util.StringHelper;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -870,7 +871,14 @@ public class XmlConverter {
     @Converter
     public Document toDOMDocument(InputStream in, Exchange exchange) throws IOException,
SAXException, ParserConfigurationException {
         DocumentBuilder documentBuilder = createDocumentBuilder(getDocumentBuilderFactory(exchange));
-        return documentBuilder.parse(in);
+        if (in instanceof IOConverter.EncodingInputStream) {
+            // DocumentBuilder detects encoding from XML declaration, so we need to
+            // revert the converted encoding for the input stream
+            IOConverter.EncodingInputStream encIn = (IOConverter.EncodingInputStream) in;
+            return documentBuilder.parse(encIn.toOriginalInputStream());
+        } else {
+            return documentBuilder.parse(in);
+        }
     }
 
     /**
diff --git a/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java
b/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java
index 9d82ade..c192349 100644
--- a/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java
+++ b/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java
@@ -38,8 +38,8 @@ public class IOConverterCharsetTest extends ContextTestSupport {
         switchToDefaultCharset(StandardCharsets.UTF_8);
         File file = new File("src/test/resources/org/apache/camel/converter/german.utf-8.txt");
         try (InputStream in = IOConverter.toInputStream(file, "UTF-8");
-        BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));

-        BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
StandardCharsets.UTF_8))) {
+             BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
+             BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
StandardCharsets.UTF_8))) {
             String line = reader.readLine();
             String naiveLine = naiveReader.readLine();
             assertEquals(naiveLine, line);
@@ -52,8 +52,8 @@ public class IOConverterCharsetTest extends ContextTestSupport {
         switchToDefaultCharset(StandardCharsets.ISO_8859_1);
         File file = new File("src/test/resources/org/apache/camel/converter/german.utf-8.txt");
         try (InputStream in = IOConverter.toInputStream(file, "UTF-8");
-        BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.ISO_8859_1));
-        BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
StandardCharsets.UTF_8))) {
+             BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.ISO_8859_1));
+             BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
StandardCharsets.UTF_8))) {
             String line = reader.readLine();
             String naiveLine = naiveReader.readLine();
             assertEquals(naiveLine, line);
@@ -66,8 +66,8 @@ public class IOConverterCharsetTest extends ContextTestSupport {
         switchToDefaultCharset(StandardCharsets.UTF_8);
         File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt");
         try (InputStream in = IOConverter.toInputStream(file, "ISO-8859-1");
-        BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
-        BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
"ISO-8859-1"))) {
+             BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
+             BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
"ISO-8859-1"))) {
             String line = reader.readLine();
             String naiveLine = naiveReader.readLine();
             assertEquals(naiveLine, line);
@@ -80,7 +80,7 @@ public class IOConverterCharsetTest extends ContextTestSupport {
         switchToDefaultCharset(StandardCharsets.UTF_8);
         File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt");
         try (InputStream in = IOConverter.toInputStream(file, "ISO-8859-1");
-        InputStream naiveIn = Files.newInputStream(Paths.get(file.getAbsolutePath()))) {
+             InputStream naiveIn = Files.newInputStream(Paths.get(file.getAbsolutePath())))
{
             byte[] bytes = new byte[8192];
             in.read(bytes);
             byte[] naiveBytes = new byte[8192];
@@ -93,7 +93,7 @@ public class IOConverterCharsetTest extends ContextTestSupport {
     public void testToReaderFileWithCharsetUTF8() throws Exception {
         File file = new File("src/test/resources/org/apache/camel/converter/german.utf-8.txt");
         try (BufferedReader reader = IOConverter.toReader(file, "UTF-8");
-        BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
StandardCharsets.UTF_8))) {
+             BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
StandardCharsets.UTF_8))) {
             String line = reader.readLine();
             String naiveLine = naiveReader.readLine();
             assertEquals(naiveLine, line);
@@ -105,7 +105,7 @@ public class IOConverterCharsetTest extends ContextTestSupport {
     public void testToReaderFileWithCharsetLatin1() throws Exception {
         File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt");
         try (BufferedReader reader = IOConverter.toReader(file, "ISO-8859-1");
-        BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
"ISO-8859-1"))) {
+             BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())),
"ISO-8859-1"))) {
             String line = reader.readLine();
             String naiveLine = naiveReader.readLine();
             assertEquals(naiveLine, line);


Mime
View raw message