james-mime4j-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nik...@apache.org
Subject svn commit: r1050333 - in /james/mime4j/branches/apache-mime4j-0.6/src: main/java/org/apache/james/mime4j/codec/DecoderUtil.java test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
Date Fri, 17 Dec 2010 10:02:32 GMT
Author: niklas
Date: Fri Dec 17 10:02:31 2010
New Revision: 1050333

URL: http://svn.apache.org/viewvc?rev=1050333&view=rev
Log:
Merged in changes to DecoderUtil and DecoderUtilTest from revision 809204 (MIME4J-138).

Modified:
    james/mime4j/branches/apache-mime4j-0.6/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
  (contents, props changed)
    james/mime4j/branches/apache-mime4j-0.6/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
  (contents, props changed)

Modified: james/mime4j/branches/apache-mime4j-0.6/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
URL: http://svn.apache.org/viewvc/james/mime4j/branches/apache-mime4j-0.6/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java?rev=1050333&r1=1050332&r2=1050333&view=diff
==============================================================================
--- james/mime4j/branches/apache-mime4j-0.6/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
(original)
+++ james/mime4j/branches/apache-mime4j-0.6/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
Fri Dec 17 10:02:31 2010
@@ -1,252 +1,253 @@
-/****************************************************************
- * Licensed to the Apache Software Foundation (ASF) under one   *
- * or more contributor license agreements.  See the NOTICE file *
- * distributed with this work for additional information        *
- * regarding copyright ownership.  The ASF licenses this file   *
- * to you under the Apache License, Version 2.0 (the            *
- * "License"); you may not use this file except in compliance   *
- * with the License.  You may obtain a copy of the License at   *
- *                                                              *
- *   http://www.apache.org/licenses/LICENSE-2.0                 *
- *                                                              *
- * Unless required by applicable law or agreed to in writing,   *
- * software distributed under the License is distributed on an  *
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
- * KIND, either express or implied.  See the License for the    *
- * specific language governing permissions and limitations      *
- * under the License.                                           *
- ****************************************************************/
-
-package org.apache.james.mime4j.codec;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.james.mime4j.util.CharsetUtil;
-
-/**
- * Static methods for decoding strings, byte arrays and encoded words.
- */
-public class DecoderUtil {
-    private static Log log = LogFactory.getLog(DecoderUtil.class);
-    
-    /**
-     * Decodes a string containing quoted-printable encoded data. 
-     * 
-     * @param s the string to decode.
-     * @return the decoded bytes.
-     */
-    public static byte[] decodeBaseQuotedPrintable(String s) {
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        
-        try {
-            byte[] bytes = s.getBytes("US-ASCII");
-            
-            QuotedPrintableInputStream is = new QuotedPrintableInputStream(
-                                               new ByteArrayInputStream(bytes));
-            
-            int b = 0;
-            while ((b = is.read()) != -1) {
-                baos.write(b);
-            }
-        } catch (IOException e) {
-            /*
-             * This should never happen!
-             */
-            log.error(e);
-        }
-        
-        return baos.toByteArray();
-    }
-    
-    /**
-     * Decodes a string containing base64 encoded data. 
-     * 
-     * @param s the string to decode.
-     * @return the decoded bytes.
-     */
-    public static byte[] decodeBase64(String s) {
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        
-        try {
-            byte[] bytes = s.getBytes("US-ASCII");
-            
-            Base64InputStream is = new Base64InputStream(
-                                        new ByteArrayInputStream(bytes));
-            
-            int b = 0;
-            while ((b = is.read()) != -1) {
-                baos.write(b);
-            }
-        } catch (IOException e) {
-            /*
-             * This should never happen!
-             */
-            log.error(e);
-        }
-        
-        return baos.toByteArray();
-    }
-    
-    /**
-     * Decodes an encoded word encoded with the 'B' encoding (described in 
-     * RFC 2047) found in a header field body.
-     * 
-     * @param encodedWord the encoded word to decode.
-     * @param charset the Java charset to use.
-     * @return the decoded string.
-     * @throws UnsupportedEncodingException if the given Java charset isn't 
-     *         supported.
-     */
-    public static String decodeB(String encodedWord, String charset) 
-            throws UnsupportedEncodingException {
-        
-        return new String(decodeBase64(encodedWord), charset);
-    }
-    
-    /**
-     * Decodes an encoded word encoded with the 'Q' encoding (described in 
-     * RFC 2047) found in a header field body.
-     * 
-     * @param encodedWord the encoded word to decode.
-     * @param charset the Java charset to use.
-     * @return the decoded string.
-     * @throws UnsupportedEncodingException if the given Java charset isn't 
-     *         supported.
-     */
-    public static String decodeQ(String encodedWord, String charset)
-            throws UnsupportedEncodingException {
-           
-        /*
-         * Replace _ with =20
-         */
-        StringBuilder sb = new StringBuilder(128);
-        for (int i = 0; i < encodedWord.length(); i++) {
-            char c = encodedWord.charAt(i);
-            if (c == '_') {
-                sb.append("=20");
-            } else {
-                sb.append(c);
-            }
-        }
-        
-        return new String(decodeBaseQuotedPrintable(sb.toString()), charset);
-    }
-    
-    /**
-     * Decodes a string containing encoded words as defined by RFC 2047.
-     * Encoded words in have the form 
-     * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for 
-     * quoted-printable and 'B' or 'b' for Base64.
-     * 
-     * @param body the string to decode.
-     * @return the decoded string.
-     */
-    public static String decodeEncodedWords(String body) {
-        int previousEnd = 0;
-        boolean previousWasEncoded = false;
-
-        StringBuilder sb = new StringBuilder();
-
-        while (true) {
-            int begin = body.indexOf("=?", previousEnd);
-            int end = begin == -1 ? -1 : body.indexOf("?=", begin + 2);
-            if (end == -1) {
-                if (previousEnd == 0)
-                    return body;
-
-                sb.append(body.substring(previousEnd));
-                return sb.toString();
-            }
-            end += 2;
-
-            String sep = body.substring(previousEnd, begin);
-
-            String decoded = decodeEncodedWord(body, begin, end);
-            if (decoded == null) {
-                sb.append(sep);
-                sb.append(body.substring(begin, end));
-            } else {
-                if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) {
-                    sb.append(sep);
-                }
-                sb.append(decoded);
-            }
-
-            previousEnd = end;
-            previousWasEncoded = decoded != null;
-        }
-    }
-
-    // return null on error
-    private static String decodeEncodedWord(String body, int begin, int end) {
-        int qm1 = body.indexOf('?', begin + 2);
-        if (qm1 == end - 2)
-            return null;
-
-        int qm2 = body.indexOf('?', qm1 + 1);
-        if (qm2 == end - 2)
-            return null;
-
-        String mimeCharset = body.substring(begin + 2, qm1);
-        String encoding = body.substring(qm1 + 1, qm2);
-        String encodedText = body.substring(qm2 + 1, end - 2);
-
-        String charset = CharsetUtil.toJavaCharset(mimeCharset);
-        if (charset == null) {
-            if (log.isWarnEnabled()) {
-                log.warn("MIME charset '" + mimeCharset + "' in encoded word '"
-                        + body.substring(begin, end) + "' doesn't have a "
-                        + "corresponding Java charset");
-            }
-            return null;
-        } else if (!CharsetUtil.isDecodingSupported(charset)) {
-            if (log.isWarnEnabled()) {
-                log.warn("Current JDK doesn't support decoding of charset '"
-                        + charset + "' (MIME charset '" + mimeCharset
-                        + "' in encoded word '" + body.substring(begin, end)
-                        + "')");
-            }
-            return null;
-        }
-
-        if (encodedText.length() == 0) {
-            if (log.isWarnEnabled()) {
-                log.warn("Missing encoded text in encoded word: '"
-                        + body.substring(begin, end) + "'");
-            }
-            return null;
-        }
-
-        try {
-            if (encoding.equalsIgnoreCase("Q")) {
-                return DecoderUtil.decodeQ(encodedText, charset);
-            } else if (encoding.equalsIgnoreCase("B")) {
-                return DecoderUtil.decodeB(encodedText, charset);
-            } else {
-                if (log.isWarnEnabled()) {
-                    log.warn("Warning: Unknown encoding in encoded word '"
-                            + body.substring(begin, end) + "'");
-                }
-                return null;
-            }
-        } catch (UnsupportedEncodingException e) {
-            // should not happen because of isDecodingSupported check above
-            if (log.isWarnEnabled()) {
-                log.warn("Unsupported encoding in encoded word '"
-                        + body.substring(begin, end) + "'", e);
-            }
-            return null;
-        } catch (RuntimeException e) {
-            if (log.isWarnEnabled()) {
-                log.warn("Could not decode encoded word '"
-                        + body.substring(begin, end) + "'", e);
-            }
-            return null;
-        }
-    }
-}
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.mime4j.codec;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.james.mime4j.util.CharsetUtil;
+
+/**
+ * Static methods for decoding strings, byte arrays and encoded words.
+ */
+public class DecoderUtil {
+    private static Log log = LogFactory.getLog(DecoderUtil.class);
+
+    private static final Pattern PATTERN_ENCODED_WORD = Pattern.compile(
+            "(.*?)=\\?([^\\?]+?)\\?(\\w)\\?([^\\?]+?)\\?=", Pattern.DOTALL);
+
+    /**
+     * Decodes a string containing quoted-printable encoded data. 
+     * 
+     * @param s the string to decode.
+     * @return the decoded bytes.
+     */
+    public static byte[] decodeQuotedPrintable(String s) {
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        
+        try {
+            byte[] bytes = s.getBytes("US-ASCII");
+            
+            QuotedPrintableInputStream is = new QuotedPrintableInputStream(
+                                               new ByteArrayInputStream(bytes));
+            
+            int b = 0;
+            while ((b = is.read()) != -1) {
+                baos.write(b);
+            }
+        } catch (IOException e) {
+            // This should never happen!
+            log.error(e);
+            throw new IllegalStateException(e);
+        }
+        
+        return baos.toByteArray();
+    }
+    
+    /**
+     * Decodes a string containing base64 encoded data. 
+     * 
+     * @param s the string to decode.
+     * @return the decoded bytes.
+     */
+    public static byte[] decodeBase64(String s) {
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        
+        try {
+            byte[] bytes = s.getBytes("US-ASCII");
+            
+            Base64InputStream is = new Base64InputStream(
+                                        new ByteArrayInputStream(bytes));
+            
+            int b = 0;
+            while ((b = is.read()) != -1) {
+                baos.write(b);
+            }
+        } catch (IOException e) {
+            // This should never happen!
+            log.error(e);
+            throw new IllegalStateException(e);
+        }
+        
+        return baos.toByteArray();
+    }
+    
+    /**
+     * Decodes an encoded text encoded with the 'B' encoding (described in 
+     * RFC 2047) found in a header field body.
+     * 
+     * @param encodedText the encoded text to decode.
+     * @param charset the Java charset to use.
+     * @return the decoded string.
+     * @throws UnsupportedEncodingException if the given Java charset isn't 
+     *         supported.
+     */
+    public static String decodeB(String encodedText, String charset) 
+            throws UnsupportedEncodingException {
+        byte[] decodedBytes = decodeBase64(encodedText);
+        return new String(decodedBytes, charset);
+    }
+    
+    /**
+     * Decodes an encoded text encoded with the 'Q' encoding (described in 
+     * RFC 2047) found in a header field body.
+     * 
+     * @param encodedText the encoded text to decode.
+     * @param charset the Java charset to use.
+     * @return the decoded string.
+     * @throws UnsupportedEncodingException if the given Java charset isn't 
+     *         supported.
+     */
+    public static String decodeQ(String encodedText, String charset)
+            throws UnsupportedEncodingException {
+        encodedText = replaceUnderscores(encodedText);
+        
+        byte[] decodedBytes = decodeQuotedPrintable(encodedText);
+        return new String(decodedBytes, charset);
+    }
+
+    /**
+     * Decodes a string containing encoded words as defined by RFC 2047. Encoded
+     * words have the form =?charset?enc?encoded-text?= where enc is either 'Q'
+     * or 'q' for quoted-printable and 'B' or 'b' for base64.
+     * 
+     * @param body the string to decode.
+     * @return the decoded string.
+     */
+    public static String decodeEncodedWords(String body) {
+        int tailIndex = 0;
+        boolean lastMatchValid = false;
+
+        StringBuilder sb = new StringBuilder();
+
+        for (Matcher matcher = PATTERN_ENCODED_WORD.matcher(body); matcher.find();) {
+            String separator = matcher.group(1);
+            String mimeCharset = matcher.group(2);
+            String encoding = matcher.group(3);
+            String encodedText = matcher.group(4);
+
+            String decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText);
+            if (decoded == null) {
+                sb.append(matcher.group(0));
+            } else {
+                if (!lastMatchValid || !CharsetUtil.isWhitespace(separator)) {
+                    sb.append(separator);
+                }
+                sb.append(decoded);
+            }
+
+            tailIndex = matcher.end();
+            lastMatchValid = decoded != null;
+        }
+
+        if (tailIndex == 0) {
+            return body;
+        } else {
+            sb.append(body.substring(tailIndex));
+            return sb.toString();
+        }
+    }
+
+    // return null on error
+    private static String tryDecodeEncodedWord(final String mimeCharset,
+            final String encoding, final String encodedText) {
+        String charset = CharsetUtil.toJavaCharset(mimeCharset);
+        if (charset == null) {
+            if (log.isWarnEnabled()) {
+                log.warn("MIME charset '" + mimeCharset + "' in encoded word '"
+                        + recombine(mimeCharset, encoding, encodedText) + "' doesn't have
a "
+                        + "corresponding Java charset");
+            }
+            return null;
+        } else if (!CharsetUtil.isDecodingSupported(charset)) {
+            if (log.isWarnEnabled()) {
+                log.warn("Current JDK doesn't support decoding of charset '"
+                        + charset + "' (MIME charset '" + mimeCharset
+                        + "' in encoded word '" + recombine(mimeCharset, encoding, encodedText)
+                        + "')");
+            }
+            return null;
+        }
+
+        if (encodedText.length() == 0) {
+            if (log.isWarnEnabled()) {
+                log.warn("Missing encoded text in encoded word: '"
+                        + recombine(mimeCharset, encoding, encodedText) + "'");
+            }
+            return null;
+        }
+
+        try {
+            if (encoding.equalsIgnoreCase("Q")) {
+                return DecoderUtil.decodeQ(encodedText, charset);
+            } else if (encoding.equalsIgnoreCase("B")) {
+                return DecoderUtil.decodeB(encodedText, charset);
+            } else {
+                if (log.isWarnEnabled()) {
+                    log.warn("Warning: Unknown encoding in encoded word '"
+                            + recombine(mimeCharset, encoding, encodedText) + "'");
+                }
+                return null;
+            }
+        } catch (UnsupportedEncodingException e) {
+            // should not happen because of isDecodingSupported check above
+            if (log.isWarnEnabled()) {
+                log.warn("Unsupported encoding in encoded word '"
+                        + recombine(mimeCharset, encoding, encodedText) + "'", e);
+            }
+            return null;
+        } catch (RuntimeException e) {
+            if (log.isWarnEnabled()) {
+                log.warn("Could not decode encoded word '"
+                        + recombine(mimeCharset, encoding, encodedText) + "'", e);
+            }
+            return null;
+        }
+    }
+
+    private static String recombine(final String mimeCharset,
+            final String encoding, final String encodedText) {
+        return "=?" + mimeCharset + "?" + encoding + "?" + encodedText + "?=";
+    }
+
+    // Replace _ with =20
+    private static String replaceUnderscores(String str) {
+        // probably faster than String#replace(CharSequence, CharSequence)
+
+        StringBuilder sb = new StringBuilder(128);
+
+        for (int i = 0; i < str.length(); i++) {
+            char c = str.charAt(i);
+            if (c == '_') {
+                sb.append("=20");
+            } else {
+                sb.append(c);
+            }
+        }
+        
+        return sb.toString();
+    }
+}

Propchange: james/mime4j/branches/apache-mime4j-0.6/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Fri Dec 17 10:02:31 2010
@@ -0,0 +1 @@
+/james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java:809204

Modified: james/mime4j/branches/apache-mime4j-0.6/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
URL: http://svn.apache.org/viewvc/james/mime4j/branches/apache-mime4j-0.6/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java?rev=1050333&r1=1050332&r2=1050333&view=diff
==============================================================================
--- james/mime4j/branches/apache-mime4j-0.6/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
(original)
+++ james/mime4j/branches/apache-mime4j-0.6/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
Fri Dec 17 10:02:31 2010
@@ -1,105 +1,127 @@
-/****************************************************************
- * Licensed to the Apache Software Foundation (ASF) under one   *
- * or more contributor license agreements.  See the NOTICE file *
- * distributed with this work for additional information        *
- * regarding copyright ownership.  The ASF licenses this file   *
- * to you under the Apache License, Version 2.0 (the            *
- * "License"); you may not use this file except in compliance   *
- * with the License.  You may obtain a copy of the License at   *
- *                                                              *
- *   http://www.apache.org/licenses/LICENSE-2.0                 *
- *                                                              *
- * Unless required by applicable law or agreed to in writing,   *
- * software distributed under the License is distributed on an  *
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
- * KIND, either express or implied.  See the License for the    *
- * specific language governing permissions and limitations      *
- * under the License.                                           *
- ****************************************************************/
-
-package org.apache.james.mime4j.codec;
-
-import java.io.UnsupportedEncodingException;
-
-import junit.framework.TestCase;
-
-import org.apache.log4j.BasicConfigurator;
-
-public class DecoderUtilTest extends TestCase {
-
-    @Override
-    public void setUp() {
-        BasicConfigurator.resetConfiguration();
-        BasicConfigurator.configure();
-    }
-    /*
-    public void testDecodeEncodedWords() {
-        String s = "=?ISO-2022-JP?B?GyRCTCQbKEobJEI+NRsoShskQkJ6GyhKGyRCOS0bKEo=?= "
-                 + "=?ISO-2022-JP?B?GyRCOXAbKEobJEIiKBsoShskQiU1GyhKGyRCJSQbKEo=?= "
-                 + "=?ISO-2022-JP?B?GyRCJUkbKEobJEIlUxsoShskQiU4GyhKGyRCJU0bKEo=?= "  
-                 + "=?ISO-2022-JP?B?GyRCJTkbKEobJEIkThsoShskQjdoGyhKGyRCRGobKEo=?= "
-                 + "=?ISO-2022-JP?B?GyRCSEcbKEobJEIkRxsoShskQiQ5GyhKGyRCISobKEo=?=";    
 
-        
-        s = DecoderUtil.decodeEncodedWords(s);
-        System.out.println(s);
-    }*/
-    
-    public void testDecodeB() throws UnsupportedEncodingException {
-        String s = DecoderUtil.decodeB("VGhpcyBpcyB0aGUgcGxhaW4gd"
-                    + "GV4dCBtZXNzYWdlIQ==", "ISO8859-1");
-        assertEquals("This is the plain text message!", s);
-    }
-    
-
-    public void testDecodeQ() throws UnsupportedEncodingException {
-        String s = DecoderUtil.decodeQ("=e1_=e2=09=E3_=E4_", 
-                                                         "ISO8859-1");
-        assertEquals("\u00e1 \u00e2\t\u00e3 \u00e4 ", s);
-    }
-    
-    public void testDecodeEncodedWords() {
-        assertEquals("", DecoderUtil.decodeEncodedWords(""));
-        assertEquals("Yada yada", DecoderUtil.decodeEncodedWords("Yada yada"));
-        assertEquals("  \u00e1\u00e2\u00e3\t\u00e4", 
-                DecoderUtil.decodeEncodedWords("=?iso-8859-1?Q?_=20=e1=e2=E3=09=E4?="));
-        assertEquals("Word 1 '  \u00e2\u00e3\t\u00e4'. Word 2 '  \u00e2\u00e3\t\u00e4'",

-                DecoderUtil.decodeEncodedWords("Word 1 '=?iso-8859-1?Q?_=20=e2=E3=09=E4?="
-                        + "'. Word 2 '=?iso-8859-1?q?_=20=e2=E3=09=E4?='"));
-        assertEquals("=?iso-8859-YADA?Q?_=20=t1=e2=E3=09=E4?=", 
-                DecoderUtil.decodeEncodedWords("=?iso-8859-YADA?Q?_=20=t1=e2=E3=09=E4?="));
-        assertEquals("A short text", 
-                DecoderUtil.decodeEncodedWords("=?US-ASCII?B?QSBzaG9ydCB0ZXh0?="));
-        assertEquals("A short text again!", 
-                DecoderUtil.decodeEncodedWords("=?US-ASCII?b?QSBzaG9ydCB0ZXh0IGFnYWluIQ==?="));
-
-        // invalid encoded words should be returned unchanged
-        assertEquals("=?iso8859-1?Q?=", DecoderUtil.decodeEncodedWords("=?iso8859-1?Q?="));
-        assertEquals("=?iso8859-1?b?=", DecoderUtil.decodeEncodedWords("=?iso8859-1?b?="));
-        assertEquals("=?ISO-8859-1?Q?", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?"));
-        assertEquals("=?ISO-8859-1?R?abc?=", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?R?abc?="));
-
-        // encoded-text requires at least one character according to rfc 2047
-        assertEquals("=?ISO-8859-1?Q??=", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q??="));
-        assertEquals("=?ISO-8859-1?B??=", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?B??="));
-        
-        // white space between encoded words should be removed (MIME4J-104)
-        assertEquals("a", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?="));
-        assertEquals("a b", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= b"));
-        assertEquals("ab", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?="));
-        assertEquals("ab", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?="));
-        assertEquals("ab", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?=\r\n  =?ISO-8859-1?Q?b?="));
-        assertEquals("a b", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a_b?="));
-        assertEquals("a b", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?="));
-
-        // non white space between encoded words should be retained
-        assertEquals("a b c", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= b =?ISO-8859-1?Q?c?="));
-
-        // text before and after encoded words should be retained
-        assertEquals(" a b c ", DecoderUtil.decodeEncodedWords(" =?ISO-8859-1?Q?a?= b =?ISO-8859-1?Q?c?=
"));
-        assertEquals("! a b c !", DecoderUtil.decodeEncodedWords("! =?ISO-8859-1?Q?a?= b
=?ISO-8859-1?Q?c?= !"));
-        
-        // Bug detected on June 7, 2005. Decoding the following string caused
-        // OutOfMemoryError.
-        assertEquals("=3?!!\\=?\"!g6P\"!Xp:\"!", DecoderUtil.decodeEncodedWords("=3?!!\\=?\"!g6P\"!Xp:\"!"));
-    }    
-}
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.mime4j.codec;
+
+import java.io.UnsupportedEncodingException;
+
+import junit.framework.TestCase;
+
+import org.apache.log4j.BasicConfigurator;
+
+public class DecoderUtilTest extends TestCase {
+
+    @Override
+    public void setUp() {
+        BasicConfigurator.resetConfiguration();
+        BasicConfigurator.configure();
+    }
+
+    public void testDecodeB() throws UnsupportedEncodingException {
+        String s = DecoderUtil.decodeB("VGhpcyBpcyB0aGUgcGxhaW4gd"
+                    + "GV4dCBtZXNzYWdlIQ==", "ISO8859-1");
+        assertEquals("This is the plain text message!", s);
+    }
+
+    public void testDecodeQ() throws UnsupportedEncodingException {
+        String s = DecoderUtil.decodeQ("=e1_=e2=09=E3_=E4_", "ISO8859-1");
+        assertEquals("\u00e1 \u00e2\t\u00e3 \u00e4 ", s);
+    }
+
+    public void testNonEncodedWordsAreIgnored() {
+        assertEquals("", DecoderUtil.decodeEncodedWords(""));
+        assertEquals("Yada yada", DecoderUtil.decodeEncodedWords("Yada yada"));
+    }
+
+    public void testDecodeSomeEncodedWords() {
+        assertEquals("  \u00e1\u00e2\u00e3\t\u00e4", 
+                DecoderUtil.decodeEncodedWords("=?iso-8859-1?Q?_=20=e1=e2=E3=09=E4?="));
+        assertEquals("Word 1 '  \u00e2\u00e3\t\u00e4'. Word 2 '  \u00e2\u00e3\t\u00e4'",

+                DecoderUtil.decodeEncodedWords("Word 1 '=?iso-8859-1?Q?_=20=e2=E3=09=E4?="
+                        + "'. Word 2 '=?iso-8859-1?q?_=20=e2=E3=09=E4?='"));
+        assertEquals("=?iso-8859-YADA?Q?_=20=t1=e2=E3=09=E4?=", 
+                DecoderUtil.decodeEncodedWords("=?iso-8859-YADA?Q?_=20=t1=e2=E3=09=E4?="));
+        assertEquals("A short text", 
+                DecoderUtil.decodeEncodedWords("=?US-ASCII?B?QSBzaG9ydCB0ZXh0?="));
+        assertEquals("A short text again!", 
+                DecoderUtil.decodeEncodedWords("=?US-ASCII?b?QSBzaG9ydCB0ZXh0IGFnYWluIQ==?="));
+    }
+
+    public void testDecodeJapaneseEncodedWords() {
+        String enc = "=?ISO-2022-JP?B?GyRCTCQbKEobJEI+NRsoShskQkJ6GyhKGyRCOS0bKEo=?= "
+                 + "=?ISO-2022-JP?B?GyRCOXAbKEobJEIiKBsoShskQiU1GyhKGyRCJSQbKEo=?= "
+                 + "=?ISO-2022-JP?B?GyRCJUkbKEobJEIlUxsoShskQiU4GyhKGyRCJU0bKEo=?= "  
+                 + "=?ISO-2022-JP?B?GyRCJTkbKEobJEIkThsoShskQjdoGyhKGyRCRGobKEo=?= "
+                 + "=?ISO-2022-JP?B?GyRCSEcbKEobJEIkRxsoShskQiQ5GyhKGyRCISobKEo=?=";    
 
+
+        String dec = DecoderUtil.decodeEncodedWords(enc);
+        assertEquals("\u672A\u627F\u8AFE\u5E83\u544A\u203B\u30B5\u30A4\u30C9\u30D3"
+                + "\u30B8\u30CD\u30B9\u306E\u6C7A\u5B9A\u7248\u3067\u3059\uFF01", dec);
+    }
+
+    public void testInvalidEncodedWordsAreIgnored() {
+        assertEquals("=?iso8859-1?Q?=", DecoderUtil.decodeEncodedWords("=?iso8859-1?Q?="));
+        assertEquals("=?iso8859-1?b?=", DecoderUtil.decodeEncodedWords("=?iso8859-1?b?="));
+        assertEquals("=?ISO-8859-1?Q?", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?"));
+        assertEquals("=?ISO-8859-1?R?abc?=", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?R?abc?="));
+        assertEquals("test =?ISO-8859-1?R?abc?=", DecoderUtil.decodeEncodedWords("test =?ISO-8859-1?R?abc?="));
+    }
+
+    public void testEmptyEncodedTextIsIgnored() {
+        // encoded-text requires at least one character according to rfc 2047
+        assertEquals("=?ISO-8859-1?Q??=", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q??="));
+        assertEquals("=?ISO-8859-1?B??=", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?B??="));
+    }
+
+    // see MIME4J-104
+    public void testWhiteSpaceBetweenEncodedWordsGetsRemoved() {
+        assertEquals("a", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?="));
+        assertEquals("a b", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= b"));
+        assertEquals("ab", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?="));
+        assertEquals("ab", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?="));
+        assertEquals("ab", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?=\r\n  =?ISO-8859-1?Q?b?="));
+        assertEquals("a b", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a_b?="));
+        assertEquals("a b", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?="));
+    }
+
+    // see MIME4J-138
+    public void testEncodedTextMayStartWithAnEqualsSign() {
+        assertEquals(" foo", DecoderUtil.decodeEncodedWords("=?utf-8?Q?=20foo?="));
+        assertEquals("Re: How to place a view at the bottom with a 100% width",
+            DecoderUtil.decodeEncodedWords("=?utf-8?Q?Re:=20How=20to=20place=20a=20view=20at=20the=20bottom?=
"
+                    + "=?utf-8?Q?=20with=20a=20100%=20width?="));
+        assertEquals("Test \u00fc and more",
+            DecoderUtil.decodeEncodedWords("Test =?ISO-8859-1?Q?=FC_?= =?ISO-8859-1?Q?and_more?="));
+    }
+
+    public void testNonWhiteSpaceBetweenEncodedWordsIsRetained() {
+        assertEquals("a b c", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= b =?ISO-8859-1?Q?c?="));
+        assertEquals("a\rb\nc", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?=\rb\n=?ISO-8859-1?Q?c?="));
+    }
+
+    public void testTextBeforeAndAfterEncodedWordIsRetained() {
+        assertEquals(" a b c ", DecoderUtil.decodeEncodedWords(" =?ISO-8859-1?Q?a?= b =?ISO-8859-1?Q?c?=
"));
+        assertEquals("! a b c !", DecoderUtil.decodeEncodedWords("! =?ISO-8859-1?Q?a?= b
=?ISO-8859-1?Q?c?= !"));
+    }
+
+    public void testFunnyInputDoesNotRaiseOutOfMemoryError() {
+        // Bug detected on June 7, 2005. Decoding the following string caused OutOfMemoryError.
+        assertEquals("=3?!!\\=?\"!g6P\"!Xp:\"!", DecoderUtil.decodeEncodedWords("=3?!!\\=?\"!g6P\"!Xp:\"!"));
+    }    
+}

Propchange: james/mime4j/branches/apache-mime4j-0.6/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Fri Dec 17 10:02:31 2010
@@ -0,0 +1 @@
+/james/mime4j/trunk/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java:809204



Mime
View raw message