geronimo-scm mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bsny...@apache.org
Subject svn commit: r381393 [3/5] - in /geronimo/specs/trunk/geronimo-spec-javamail/src: main/java/javax/mail/internet/ main/java/org/apache/geronimo/mail/util/ main/resources/META-INF/ test/java/javax/mail/internet/
Date Mon, 27 Feb 2006 17:38:09 GMT
Modified: geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/javax/mail/internet/MimeUtility.java
URL: http://svn.apache.org/viewcvs/geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/javax/mail/internet/MimeUtility.java?rev=381393&r1=381392&r2=381393&view=diff
==============================================================================
--- geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/javax/mail/internet/MimeUtility.java (original)
+++ geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/javax/mail/internet/MimeUtility.java Mon Feb 27 09:38:03 2006
@@ -17,13 +17,37 @@
 
 package javax.mail.internet;
 
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
 import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.io.OutputStream;
 import java.io.UnsupportedEncodingException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.StringTokenizer;
+
 import javax.activation.DataHandler;
 import javax.activation.DataSource;
 import javax.mail.MessagingException;
 
+import org.apache.geronimo.mail.util.ASCIIUtil;
+import org.apache.geronimo.mail.util.Base64;
+import org.apache.geronimo.mail.util.Base64DecoderStream;
+import org.apache.geronimo.mail.util.Base64Encoder;
+import org.apache.geronimo.mail.util.Base64EncoderStream;
+import org.apache.geronimo.mail.util.QuotedPrintableDecoderStream;
+import org.apache.geronimo.mail.util.QuotedPrintableEncoderStream;
+import org.apache.geronimo.mail.util.QuotedPrintableEncoder;
+import org.apache.geronimo.mail.util.QuotedPrintable;
+import org.apache.geronimo.mail.util.SessionUtil;
+import org.apache.geronimo.mail.util.UUDecoderStream;
+import org.apache.geronimo.mail.util.UUEncoderStream;
+
 // encodings include "base64", "quoted-printable", "7bit", "8bit" and "binary".
 // In addition, "uuencode" is also supported. The
 
@@ -32,6 +56,9 @@
  */
 public class MimeUtility {
 
+    private static final String MIME_FOLDENCODEDWORDS = "mail.mime.foldencodedwords";
+    private static final String MIME_DECODE_TEXT_STRICT = "mail.mime.decodetext.strict";
+
     private MimeUtility() {
     }
 
@@ -39,62 +66,614 @@
 
     private static String defaultJavaCharset;
     private static String escapedChars = "\"\\\r\n";
+    private static String linearWhiteSpace = " \t\r\n";
+
+    private static String QP_WORD_SPECIALS = "=_?\"#$%&'(),.:;<>@[\\]^`{|}~";
+    private static String QP_TEXT_SPECIALS = "=_?";
+
+    // the javamail spec includes the ability to map java encoding names to MIME-specified names.  Normally,
+    // these values are loaded from a character mapping file.
+    private static Map java2mime;
+    private static Map mime2java;
+
+    static {
+        // we need to load the mapping tables used by javaCharset() and mimeCharset().
+        loadCharacterSetMappings();
+    }
 
     public static InputStream decode(InputStream in, String encoding) throws MessagingException {
-        // TODO - take account of encoding
-        return in;
+        encoding = encoding.toLowerCase();
+
+        // some encodies are just pass-throughs, with no real decoding.
+        if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
+            return in;
+        }
+        else if (encoding.equals("base64")) {
+            return new Base64DecoderStream(in);
+        }
+        // UUEncode is known by a couple historical extension names too.
+        else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
+            return new UUDecoderStream(in);
+        }
+        else if (encoding.equals("quoted-printable")) {
+            return new QuotedPrintableDecoderStream(in);
+        }
+        else {
+            throw new MessagingException("Unknown encoding " + encoding);
+        }
     }
 
-    public static String decodeText(String word) throws UnsupportedEncodingException {
-        // TODO - take account of encoding
-        return word;
+    /**
+     * Decode a string of text obtained from a mail header into
+     * it's proper form.  The text generally will consist of a
+     * string of tokens, some of which may be encoded using
+     * base64 encoding.
+     *
+     * @param text   The text to decode.
+     *
+     * @return The decoded test string.
+     * @exception UnsupportedEncodingException
+     */
+    public static String decodeText(String text) throws UnsupportedEncodingException {
+        // if the text contains any encoded tokens, those tokens will be marked with "=?".  If the
+        // source string doesn't contain that sequent, no decoding is required.
+        if (text.indexOf("=?") < 0) {
+            return text;
+        }
+
+        // we have two sets of rules we can apply.
+        if (!SessionUtil.getBooleanProperty(MIME_DECODE_TEXT_STRICT, true)) {
+            return decodeTextNonStrict(text);
+        }
+
+        int offset = 0;
+        int endOffset = text.length();
+
+        int startWhiteSpace = -1;
+        int endWhiteSpace = -1;
+
+        StringBuffer decodedText = new StringBuffer(text.length());
+
+        boolean previousTokenEncoded = false;
+
+        while (offset < endOffset) {
+            char ch = text.charAt(offset);
+
+            // is this a whitespace character?
+            if (linearWhiteSpace.indexOf(ch) != -1) {
+                startWhiteSpace = offset;
+                while (offset < endOffset) {
+                    // step over the white space characters.
+                    ch = text.charAt(offset);
+                    if (linearWhiteSpace.indexOf(ch) != -1) {
+                        offset++;
+                    }
+                    else {
+                        // record the location of the first non lwsp and drop down to process the
+                        // token characters.
+                        endWhiteSpace = offset;
+                        break;
+                    }
+                }
+            }
+            else {
+                // we have a word token.  We need to scan over the word and then try to parse it.
+                int wordStart = offset;
+
+                while (offset < endOffset) {
+                    // step over the white space characters.
+                    ch = text.charAt(offset);
+                    if (linearWhiteSpace.indexOf(ch) == -1) {
+                        offset++;
+                    }
+                    else {
+                        break;
+                    }
+
+                    //NB:  Trailing whitespace on these header strings will just be discarded.
+                }
+                // pull out the word token.
+                String word = text.substring(wordStart, offset);
+                // is the token encoded?  decode the word
+                if (word.startsWith("=?")) {
+                    try {
+                        // if this gives a parsing failure, treat it like a non-encoded word.
+                        String decodedWord = decodeWord(word);
+
+                        // are any whitespace characters significant?  Append 'em if we've got 'em.
+                        if (!previousTokenEncoded) {
+                            if (startWhiteSpace != -1) {
+                                decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
+                                startWhiteSpace = -1;
+                            }
+                        }
+                        // this is definitely a decoded token.
+                        previousTokenEncoded = true;
+                        // and add this to the text.
+                        decodedText.append(decodedWord);
+                        // we continue parsing from here...we allow parsing errors to fall through
+                        // and get handled as normal text.
+                        continue;
+
+                    } catch (ParseException e) {
+                    }
+                }
+                // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
+                // if we have it.
+                if (startWhiteSpace != -1) {
+                    decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
+                    startWhiteSpace = -1;
+                }
+                // this is not a decoded token.
+                previousTokenEncoded = false;
+                decodedText.append(word);
+            }
+        }
+
+        return decodedText.toString();
+    }
+
+
+    /**
+     * Decode a string of text obtained from a mail header into
+     * it's proper form.  The text generally will consist of a
+     * string of tokens, some of which may be encoded using
+     * base64 encoding.  This is for non-strict decoded for mailers that
+     * violate the RFC 2047 restriction that decoded tokens must be delimited
+     * by linear white space.  This will scan tokens looking for inner tokens
+     * enclosed in "=?" -- "?=" pairs.
+     *
+     * @param text   The text to decode.
+     *
+     * @return The decoded test string.
+     * @exception UnsupportedEncodingException
+     */
+    private static String decodeTextNonStrict(String text) throws UnsupportedEncodingException {
+        int offset = 0;
+        int endOffset = text.length();
+
+        int startWhiteSpace = -1;
+        int endWhiteSpace = -1;
+
+        StringBuffer decodedText = new StringBuffer(text.length());
+
+        boolean previousTokenEncoded = false;
+
+        while (offset < endOffset) {
+            char ch = text.charAt(offset);
+
+            // is this a whitespace character?
+            if (linearWhiteSpace.indexOf(ch) != -1) {
+                startWhiteSpace = offset;
+                while (offset < endOffset) {
+                    // step over the white space characters.
+                    ch = text.charAt(offset);
+                    if (linearWhiteSpace.indexOf(ch) != -1) {
+                        offset++;
+                    }
+                    else {
+                        // record the location of the first non lwsp and drop down to process the
+                        // token characters.
+                        endWhiteSpace = offset;
+                        break;
+                    }
+                }
+            }
+            else {
+                // we're at the start of a word token.  We potentially need to break this up into subtokens
+                int wordStart = offset;
+
+                while (offset < endOffset) {
+                    // step over the white space characters.
+                    ch = text.charAt(offset);
+                    if (linearWhiteSpace.indexOf(ch) == -1) {
+                        offset++;
+                    }
+                    else {
+                        break;
+                    }
+
+                    //NB:  Trailing whitespace on these header strings will just be discarded.
+                }
+                // pull out the word token.
+                String word = text.substring(wordStart, offset);
+
+                int decodeStart = 0;
+
+                // now scan and process each of the bits within here.
+                while (decodeStart < word.length()) {
+                    int tokenStart = word.indexOf("=?", decodeStart);
+                    if (tokenStart == -1) {
+                        // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
+                        // if we have it.
+                        if (startWhiteSpace != -1) {
+                            decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
+                            startWhiteSpace = -1;
+                        }
+                        // this is not a decoded token.
+                        previousTokenEncoded = false;
+                        decodedText.append(word.substring(decodeStart));
+                        // we're finished.
+                        break;
+                    }
+                    // we have something to process
+                    else {
+                        // we might have a normal token preceeding this.
+                        if (tokenStart != decodeStart) {
+                            // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
+                            // if we have it.
+                            if (startWhiteSpace != -1) {
+                                decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
+                                startWhiteSpace = -1;
+                            }
+                            // this is not a decoded token.
+                            previousTokenEncoded = false;
+                            decodedText.append(word.substring(decodeStart, tokenStart));
+                        }
+
+                        // now find the end marker.
+                        int tokenEnd = word.indexOf("?=", tokenStart);
+                        // sigh, an invalid token.  Treat this as plain text.
+                        if (tokenEnd == -1) {
+                            // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
+                            // if we have it.
+                            if (startWhiteSpace != -1) {
+                                decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
+                                startWhiteSpace = -1;
+                            }
+                            // this is not a decoded token.
+                            previousTokenEncoded = false;
+                            decodedText.append(word.substring(tokenStart));
+                            // we're finished.
+                            break;
+                        }
+                        else {
+                            // update our ticker
+                            decodeStart = tokenEnd + 2;
+
+                            String token = word.substring(tokenStart, tokenEnd);
+                            try {
+                                // if this gives a parsing failure, treat it like a non-encoded word.
+                                String decodedWord = decodeWord(token);
+
+                                // are any whitespace characters significant?  Append 'em if we've got 'em.
+                                if (!previousTokenEncoded) {
+                                    if (startWhiteSpace != -1) {
+                                        decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
+                                        startWhiteSpace = -1;
+                                    }
+                                }
+                                // this is definitely a decoded token.
+                                previousTokenEncoded = true;
+                                // and add this to the text.
+                                decodedText.append(decodedWord);
+                                // we continue parsing from here...we allow parsing errors to fall through
+                                // and get handled as normal text.
+                                continue;
+
+                            } catch (ParseException e) {
+                            }
+                            // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
+                            // if we have it.
+                            if (startWhiteSpace != -1) {
+                                decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
+                                startWhiteSpace = -1;
+                            }
+                            // this is not a decoded token.
+                            previousTokenEncoded = false;
+                            decodedText.append(token);
+                        }
+                    }
+                }
+            }
+        }
+
+        return decodedText.toString();
     }
 
+    /**
+     * Parse a string using the RFC 2047 rules for an "encoded-word"
+     * type.  This encoding has the syntax:
+     *
+     * encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
+     *
+     * @param word   The possibly encoded word value.
+     *
+     * @return The decoded word.
+     * @exception ParseException
+     * @exception UnsupportedEncodingException
+     */
     public static String decodeWord(String word) throws ParseException, UnsupportedEncodingException {
-        // TODO - take account of encoding
-        return word;
+        // encoded words start with the characters "=?".  If this not an encoded word, we throw a
+        // ParseException for the caller.
+
+        if (!word.startsWith("=?")) {
+            throw new ParseException("Invalid RFC 2047 encoded-word: " + word);
+        }
+
+        int charsetPos = word.indexOf('?', 2);
+        if (charsetPos == -1) {
+            throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word);
+        }
+
+        // pull out the character set information (this is the MIME name at this point).
+        String charset = word.substring(2, charsetPos).toLowerCase();
+
+        // now pull out the encoding token the same way.
+        int encodingPos = word.indexOf('?', charsetPos + 1);
+        if (encodingPos == -1) {
+            throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word);
+        }
+
+        String encoding = word.substring(charsetPos + 1, encodingPos);
+
+        // and finally the encoded text.
+        int encodedTextPos = word.indexOf("?=", encodingPos + 1);
+        if (encodedTextPos == -1) {
+            throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word);
+        }
+
+        String encodedText = word.substring(encodingPos + 1, encodedTextPos);
+
+        // seems a bit silly to encode a null string, but easy to deal with.
+        if (encodedText.length() == 0) {
+            return "";
+        }
+
+        try {
+            // the decoder writes directly to an output stream.
+            ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length());
+
+            byte[] encodedData = encodedText.getBytes("US-ASCII");
+
+            // Base64 encoded?
+            if (encoding.equals("B")) {
+                Base64.decode(encodedData, out);
+            }
+            // maybe quoted printable.
+            else if (encoding.equals("Q")) {
+                QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
+                dataEncoder.decodeWord(encodedData, out);
+            }
+            else {
+                throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding);
+            }
+            // get the decoded byte data and convert into a string.
+            byte[] decodedData = out.toByteArray();
+            return new String(decodedData, javaCharset(charset));
+        } catch (IOException e) {
+            throw new UnsupportedEncodingException("Invalid RFC 2047 encoding");
+        }
+
     }
 
+    /**
+     * Wrap an encoder around a given output stream.
+     *
+     * @param out      The output stream to wrap.
+     * @param encoding The name of the encoding.
+     *
+     * @return A instance of FilterOutputStream that manages on the fly
+     *         encoding for the requested encoding type.
+     * @exception MessagingException
+     */
     public static OutputStream encode(OutputStream out, String encoding) throws MessagingException {
-        // TODO - take account of encoding
-        return out;
+        // no encoding specified, so assume it goes out unchanged.
+        if (encoding == null) {
+            return out;
+        }
+
+        encoding = encoding.toLowerCase();
+
+        // some encodies are just pass-throughs, with no real decoding.
+        if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
+            return out;
+        }
+        else if (encoding.equals("base64")) {
+            return new Base64EncoderStream(out);
+        }
+        // UUEncode is known by a couple historical extension names too.
+        else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
+            return new UUEncoderStream(out);
+        }
+        else if (encoding.equals("quoted-printable")) {
+            return new QuotedPrintableEncoderStream(out);
+        }
+        else {
+            throw new MessagingException("Unknown encoding " + encoding);
+        }
     }
 
+    /**
+     * Wrap an encoder around a given output stream.
+     *
+     * @param out      The output stream to wrap.
+     * @param encoding The name of the encoding.
+     * @param filename The filename of the data being sent (only used for UUEncode).
+     *
+     * @return A instance of FilterOutputStream that manages on the fly
+     *         encoding for the requested encoding type.
+     * @exception MessagingException
+     */
     public static OutputStream encode(OutputStream out, String encoding, String filename) throws MessagingException {
-        // TODO - take account of encoding
-        return out;
+        encoding = encoding.toLowerCase();
+
+        // some encodies are just pass-throughs, with no real decoding.
+        if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
+            return out;
+        }
+        else if (encoding.equals("base64")) {
+            return new Base64EncoderStream(out);
+        }
+        // UUEncode is known by a couple historical extension names too.
+        else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
+            return new UUEncoderStream(out, filename);
+        }
+        else if (encoding.equals("quoted-printable")) {
+             return new QuotedPrintableEncoderStream(out);
+        }
+        else {
+            throw new MessagingException("Unknown encoding " + encoding);
+        }
     }
 
+
     public static String encodeText(String word) throws UnsupportedEncodingException {
-        // TODO - take account of encoding
-        return word;
+        return encodeText(word, null, null);
     }
 
-    public static String encodeText(String word, String characterset, String encoding) throws UnsupportedEncodingException {
-        // TODO - take account of encoding
-        return word;
+    public static String encodeText(String word, String charset, String encoding) throws UnsupportedEncodingException {
+        return encodeWord(word, charset, encoding, false);
     }
 
     public static String encodeWord(String word) throws UnsupportedEncodingException {
-        // TODO - take account of encoding
-        return word;
+        return encodeWord(word, null, null);
     }
 
-    public static String encodeWord(String word, String characteset, String encoding) throws UnsupportedEncodingException {
-        // TODO - take account of encoding
-        return word;
+    public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException {
+        return encodeWord(word, charset, encoding, true);
+    }
+
+
+    private static String encodeWord(String word, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException {
+
+        // figure out what we need to encode this.
+        String encoder = ASCIIUtil.getTextTransferEncoding(word);
+        // all ascii?  We can return this directly,
+        if (encoder.equals("7bit")) {
+            return word;
+        }
+
+        // if not given a charset, use the default.
+        if (charset == null) {
+            charset = getDefaultMIMECharset();
+        }
+
+        // sort out the encoder.  If not explicitly given, use the best guess we've already established.
+        if (encoding != null) {
+            if (encoding.equalsIgnoreCase("B")) {
+                encoder = "base64";
+            }
+            else if (encoding.equalsIgnoreCase("G")) {
+                encoder = "quoted-printable";
+            }
+            else {
+                throw new UnsupportedEncodingException("Unknown transfer encoding: " + encoding);
+            }
+        }
+
+        try {
+            // get the string bytes in the correct source charset
+            InputStream in = new ByteArrayInputStream(word.getBytes( javaCharset(charset)));
+            ByteArrayOutputStream out = new ByteArrayOutputStream();
+
+            if (encoder.equals("base64")) {
+                Base64Encoder dataEncoder = new Base64Encoder();
+                dataEncoder.encodeWord(in, charset, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
+            }
+            else {
+                QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
+                dataEncoder.encodeWord(in, charset, encodingWord ? QP_WORD_SPECIALS : QP_TEXT_SPECIALS, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
+            }
+
+            byte[] bytes = out.toByteArray();
+            return new String(bytes);
+        } catch (IOException e) {
+            throw new UnsupportedEncodingException("Invalid encoding");
+        }
     }
 
+
+    /**
+     * Examine the content of a data source and decide what type
+     * of transfer encoding should be used.  For text streams,
+     * we'll decided between 7bit, quoted-printable, and base64.
+     * For binary content types, we'll use either 7bit or base64.
+     *
+     * @param handler The DataHandler associated with the content.
+     *
+     * @return The string name of an encoding used to transfer the content.
+     */
     public static String getEncoding(DataHandler handler) {
-        // TODO figure what type of data it is
-        return "binary";
+
+
+        // if this handler has an associated data source, we can read directly from the
+        // data source to make this judgment.  This is generally MUCH faster than asking the
+        // DataHandler to write out the data for us.
+        DataSource ds = handler.getDataSource();
+        if (ds != null) {
+            return getEncoding(ds);
+        }
+
+        try {
+            // get a parser that allows us to make comparisons.
+            ContentType content = new ContentType(ds.getContentType());
+
+            // The only access to the content bytes at this point is by asking the handler to write
+            // the information out to a stream.  We're going to pipe this through a special stream
+            // that examines the bytes as they go by.
+            ContentCheckingOutputStream checker = new ContentCheckingOutputStream();
+
+            handler.writeTo(checker);
+
+            // figure this out based on whether we believe this to be a text type or not.
+            if (content.match("text/*")) {
+                return checker.getTextTransferEncoding();
+            }
+            else {
+                return checker.getBinaryTransferEncoding();
+            }
+
+        } catch (Exception e) {
+            // any unexpected I/O exceptions we'll force to a "safe" fallback position.
+            return "base64";
+        }
     }
 
+
+    /**
+     * Determine the what transfer encoding should be used for
+     * data retrieved from a DataSource.
+     *
+     * @param source The DataSource for the transmitted data.
+     *
+     * @return The string name of the encoding form that should be used for
+     *         the data.
+     */
     public static String getEncoding(DataSource source) {
-        // TODO figure what type of data it is
-        return "binary";
+        InputStream in = null;
+
+        try {
+            // get a parser that allows us to make comparisons.
+            ContentType content = new ContentType(source.getContentType());
+
+            // we're probably going to have to scan the data.
+            in = source.getInputStream();
+
+            if (!content.match("text/*")) {
+                // Not purporting to be a text type?  Examine the content to see we might be able to
+                // at least pretend it is an ascii type.
+                return ASCIIUtil.getBinaryTransferEncoding(in);
+            }
+            else {
+                return ASCIIUtil.getTextTransferEncoding(in);
+            }
+        } catch (Exception e) {
+            // this was a problem...not sure what makes sense here, so we'll assume it's binary
+            // and we need to transfer this using Base64 encoding.
+            return "base64";
+        } finally {
+            // make sure we close the stream
+            try {
+                if (in != null) {
+                    in.close();
+                }
+            } catch (IOException e) {
+            }
+        }
     }
 
+
     /**
      * Quote a "word" value.  If the word contains any character from
      * the specified "specials" list, this value is returned as a
@@ -156,29 +735,317 @@
         return buffer.toString();
     }
 
+    /**
+     * Translate a MIME standard character set name into the Java
+     * equivalent.
+     *
+     * @param charset The MIME standard name.
+     *
+     * @return The Java equivalent for this name.
+     */
     public static String javaCharset(String charset) {
-        // TODO Perform translations as appropriate
-        return charset;
+        // nothing in, nothing out.
+        if (charset == null) {
+            return null;
+        }
+
+        String mappedCharset = (String)mime2java.get(charset.toLowerCase());
+        // if there is no mapping, then the original name is used.  Many of the MIME character set
+        // names map directly back into Java.  The reverse isn't necessarily true.
+        return mappedCharset == null ? charset : mappedCharset;
     }
 
+    /**
+     * Map a Java character set name into the MIME equivalent.
+     *
+     * @param charset The java character set name.
+     *
+     * @return The MIME standard equivalent for this character set name.
+     */
     public static String mimeCharset(String charset) {
-        // TODO Perform translations as appropriate
-        return charset;
+        // nothing in, nothing out.
+        if (charset == null) {
+            return null;
+        }
+
+        String mappedCharset = (String)java2mime.get(charset.toLowerCase());
+        // if there is no mapping, then the original name is used.  Many of the MIME character set
+        // names map directly back into Java.  The reverse isn't necessarily true.
+        return mappedCharset == null ? charset : mappedCharset;
     }
 
+
+    /**
+     * Get the default character set to use, in Java name format.
+     * This either be the value set with the mail.mime.charset
+     * system property or obtained from the file.encoding system
+     * property.  If neither of these is set, we fall back to
+     * 8859_1 (basically US-ASCII).
+     *
+     * @return The character string value of the default character set.
+     */
     public static String getDefaultJavaCharset() {
+        String charset = SessionUtil.getProperty("mail.mime.charset");
+        if (charset != null) {
+            return javaCharset(charset);
+        }
+        return SessionUtil.getProperty("file.encoding", "8859_1");
+    }
+
+    /**
+     * Get the default character set to use, in MIME name format.
+     * This either be the value set with the mail.mime.charset
+     * system property or obtained from the file.encoding system
+     * property.  If neither of these is set, we fall back to
+     * 8859_1 (basically US-ASCII).
+     *
+     * @return The character string value of the default character set.
+     */
+    public static String getDefaultMIMECharset() {
+        // if the property is specified, this can be used directly.
+        String charset = SessionUtil.getProperty("mail.mime.charset");
+        if (charset != null) {
+            return charset;
+        }
+
+        // get the Java-defined default and map back to a MIME name.
+        return mimeCharset(SessionUtil.getProperty("file.encoding", "8859_1"));
+    }
+
+
+    /**
+     * Load the default mapping tables used by the javaCharset()
+     * and mimeCharset() methods.  By default, these tables are
+     * loaded from the /META-INF/javamail.charset.map file.  If
+     * something goes wrong loading that file, we configure things
+     * with a default mapping table (which just happens to mimic
+     * what's in the default mapping file).
+     */
+    static private void loadCharacterSetMappings() {
+        java2mime = new HashMap();
+        mime2java = new HashMap();
+
+
+        // normally, these come from a character map file contained in the jar file.
         try {
-            String charset = System.getProperty("mail.mime.charset");
-            if (charset != null) {
-                return javaCharset(charset);
-            }
-            charset = System.getProperty("file.encoding");
-            if (charset != null) {
-                return charset;
+            InputStream map = javax.mail.internet.MimeUtility.class.getResourceAsStream("/META-INF/javamail.charset.map");
+
+            if (map != null) {
+                // get a reader for this so we can load.
+                BufferedReader reader = new BufferedReader(new InputStreamReader(map));
+
+                readMappings(reader, java2mime);
+                readMappings(reader, mime2java);
+            }
+        } catch (Exception e) {
+        }
+
+        // if any sort of error occurred reading the preferred file version, we could end up with empty
+        // mapping tables.  This could cause all sorts of difficulty, so ensure they are populated with at
+        // least a reasonable set of defaults.
+
+        // these mappings echo what's in the default file.
+        if (java2mime.isEmpty()) {
+            java2mime.put("8859_1", "ISO-8859-1");
+            java2mime.put("iso8859_1", "ISO-8859-1");
+            java2mime.put("iso8859-1", "ISO-8859-1");
+
+            java2mime.put("8859_2", "ISO-8859-2");
+            java2mime.put("iso8859_2", "ISO-8859-2");
+            java2mime.put("iso8859-2", "ISO-8859-2");
+
+            java2mime.put("8859_3", "ISO-8859-3");
+            java2mime.put("iso8859_3", "ISO-8859-3");
+            java2mime.put("iso8859-3", "ISO-8859-3");
+
+            java2mime.put("8859_4", "ISO-8859-4");
+            java2mime.put("iso8859_4", "ISO-8859-4");
+            java2mime.put("iso8859-4", "ISO-8859-4");
+
+            java2mime.put("8859_5", "ISO-8859-5");
+            java2mime.put("iso8859_5", "ISO-8859-5");
+            java2mime.put("iso8859-5", "ISO-8859-5");
+
+            java2mime.put ("8859_6", "ISO-8859-6");
+            java2mime.put("iso8859_6", "ISO-8859-6");
+            java2mime.put("iso8859-6", "ISO-8859-6");
+
+            java2mime.put("8859_7", "ISO-8859-7");
+            java2mime.put("iso8859_7", "ISO-8859-7");
+            java2mime.put("iso8859-7", "ISO-8859-7");
+
+            java2mime.put("8859_8", "ISO-8859-8");
+            java2mime.put("iso8859_8", "ISO-8859-8");
+            java2mime.put("iso8859-8", "ISO-8859-8");
+
+            java2mime.put("8859_9", "ISO-8859-9");
+            java2mime.put("iso8859_9", "ISO-8859-9");
+            java2mime.put("iso8859-9", "ISO-8859-9");
+
+            java2mime.put("sjis", "Shift_JIS");
+            java2mime.put ("jis", "ISO-2022-JP");
+            java2mime.put("iso2022jp", "ISO-2022-JP");
+            java2mime.put("euc_jp", "euc-jp");
+            java2mime.put("koi8_r", "koi8-r");
+            java2mime.put("euc_cn", "euc-cn");
+            java2mime.put("euc_tw", "euc-tw");
+            java2mime.put("euc_kr", "euc-kr");
+        }
+
+        if (mime2java.isEmpty ()) {
+            mime2java.put("iso-2022-cn", "ISO2022CN");
+            mime2java.put("iso-2022-kr", "ISO2022KR");
+            mime2java.put("utf-8", "UTF8");
+            mime2java.put("utf8", "UTF8");
+            mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
+            mime2java.put("ja_jp.eucjp", "EUCJIS");
+            mime2java.put ("euc-kr", "KSC5601");
+            mime2java.put("euckr", "KSC5601");
+            mime2java.put("us-ascii", "ISO-8859-1");
+            mime2java.put("x-us-ascii", "ISO-8859-1");
+        }
+    }
+
+
+    /**
+     * Read a section of a character map table and populate the
+     * target mapping table with the information.  The table end
+     * is marked by a line starting with "--" and also ending with
+     * "--".  Blank lines and comment lines (beginning with '#') are
+     * ignored.
+     *
+     * @param reader The source of the file information.
+     * @param table  The mapping table used to store the information.
+     */
+    static private void readMappings(BufferedReader reader, Map table) throws IOException {
+        // process lines to the EOF or the end of table marker.
+        while (true) {
+            String line = reader.readLine();
+            // no line returned is an EOF
+            if (line == null) {
+                return;
+            }
+
+            // trim so we're not messed up by trailing blanks
+            line = line.trim();
+
+            if (line.length() == 0 || line.startsWith("#")) {
+                continue;
+            }
+
+            // stop processing if this is the end-of-table marker.
+            if (line.startsWith("--") && line.endsWith("--")) {
+                return;
+            }
+
+            // we allow either blanks or tabs as token delimiters.
+            StringTokenizer tokenizer = new StringTokenizer(line, " \t");
+
+            try {
+                String from = tokenizer.nextToken().toLowerCase();
+                String to = tokenizer.nextToken();
+
+                table.put(from, to);
+            } catch (NoSuchElementException e) {
+                // just ignore the line if invalid.
+            }
+        }
+    }
+
+
+}
+
+
+/**
+ * Utility class for examining content information written out
+ * by a DataHandler object.  This stream gathers statistics on
+ * the stream so it can make transfer encoding determinations.
+ */
+class ContentCheckingOutputStream extends OutputStream {
+    private int asciiChars = 0;
+    private int nonAsciiChars = 0;
+    private boolean containsLongLines = false;
+    private boolean containsMalformedEOL = false;
+    private int previousChar = 0;
+    private int span = 0;
+
+    ContentCheckingOutputStream() {
+    }
+
+    public void write(byte[] data) throws IOException {
+        write(data, 0, data.length);
+    }
+
+    public void write(byte[] data, int offset, int length) throws IOException {
+        for (int i = 0; i < length; i++) {
+            write(data[offset + i]);
+        }
+    }
+
+    public void write(int ch) {
+        // we found a linebreak.  Reset the line length counters on either one.  We don't
+        // really need to validate here.
+        if (ch == '\n' || ch == '\r') {
+            // we found a newline, this is only valid if the previous char was the '\r'
+            if (ch == '\n') {
+                // malformed linebreak?  force this to base64 encoding.
+                if (previousChar != '\r') {
+                    containsMalformedEOL = true;
+                }
+            }
+            // hit a line end, reset our line length counter
+            span = 0;
+        }
+        else {
+            span++;
+            // the text has long lines, we can't transfer this as unencoded text.
+            if (span > 998) {
+                containsLongLines = true;
+            }
+
+            // non-ascii character, we have to transfer this in binary.
+            if (!ASCIIUtil.isAscii(ch)) {
+                nonAsciiChars++;
+            }
+            else {
+                asciiChars++;
+            }
+        }
+        previousChar = ch;
+    }
+
+
+    public String getBinaryTransferEncoding() {
+        if (nonAsciiChars != 0 || containsLongLines || containsMalformedEOL) {
+            return "base64";
+        }
+        else {
+            return "7bit";
+        }
+    }
+
+    public String getTextTransferEncoding() {
+        // looking good so far, only valid chars here.
+        if (nonAsciiChars == 0) {
+            // does this contain long text lines?  We need to use a Q-P encoding which will
+            // be only slightly longer, but handles folding the longer lines.
+            if (containsLongLines) {
+                return "quoted-printable";
+            }
+            else {
+                // ideal!  Easiest one to handle.
+                return "7bit";
+            }
+        }
+        else {
+            // mostly characters requiring encoding?  Base64 is our best bet.
+            if (nonAsciiChars > asciiChars) {
+                return "base64";
+            }
+            else {
+                // Q-P encoding will use fewer bytes than the full Base64.
+                return "quoted-printable";
             }
-        } catch (SecurityException e) {
-                // ignore
         }
-        return "utf-8";
     }
 }

Modified: geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/javax/mail/internet/NewsAddress.java
URL: http://svn.apache.org/viewcvs/geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/javax/mail/internet/NewsAddress.java?rev=381393&r1=381392&r2=381393&view=diff
==============================================================================
--- geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/javax/mail/internet/NewsAddress.java (original)
+++ geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/javax/mail/internet/NewsAddress.java Mon Feb 27 09:38:03 2006
@@ -20,8 +20,11 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.StringTokenizer;
+
 import javax.mail.Address;
 
+import sun.security.provider.Sun;
+
 /**
  * A representation of an RFC1036 Internet newsgroup address.
  *
@@ -75,7 +78,8 @@
     }
 
     public String toString() {
-        return host == null ? newsgroup : newsgroup + "@" + host;
+        // Sun impl only appears to return the newsgroup name, no host.
+        return newsgroup;
     }
 
     public boolean equals(Object o) {
@@ -92,7 +96,7 @@
 
     public int hashCode() {
         int result;
-        result = (host != null ? host.hashCode() : 0);
+        result = (host != null ? host.toLowerCase().hashCode() : 0);
         result = 29 * result + (newsgroup != null ? newsgroup.hashCode() : 0);
         return result;
     }
@@ -135,7 +139,7 @@
         if (addresses.length == 0) {
             return "";
         }
-        
+
         StringBuffer result = new StringBuffer(addresses.length * 32);
         result.append(addresses[0]);
         for (int i = 1; i < addresses.length; i++) {

Modified: geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/javax/mail/internet/ParameterList.java
URL: http://svn.apache.org/viewcvs/geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/javax/mail/internet/ParameterList.java?rev=381393&r1=381392&r2=381393&view=diff
==============================================================================
--- geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/javax/mail/internet/ParameterList.java (original)
+++ geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/javax/mail/internet/ParameterList.java Mon Feb 27 09:38:03 2006
@@ -25,6 +25,11 @@
 import java.util.StringTokenizer;
 import java.util.List;
 import java.util.ArrayList;// Represents lists in things like
+
+import org.apache.geronimo.mail.util.ASCIIUtil;
+import org.apache.geronimo.mail.util.RFC2231Encoder;
+import org.apache.geronimo.mail.util.SessionUtil;
+
 // Content-Type: text/plain;charset=klingon
 //
 // The ;charset=klingon is the parameter list, may have more of them with ';'
@@ -33,44 +38,125 @@
  * @version $Rev$ $Date$
  */
 public class ParameterList {
+    private static final String MIME_ENCODEPARAMETERS = "mail.mime.encodeparameters";
+    private static final String MIME_DECODEPARAMETERS = "mail.mime.decodeparameters";
+    private static final String MIME_DECODEPARAMETERS_STRICT = "mail.mime.decodeparameters.strict";
+
+    private static final int HEADER_SIZE_LIMIT = 76;
+
     private Map _parameters = new HashMap();
 
+    private boolean encodeParameters = false;
+    private boolean decodeParameters = false;
+    private boolean decodeParametersStrict = false;
+
     public ParameterList() {
+        // figure out how parameter handling is to be performed.
+        getInitialProperties();
     }
 
     public ParameterList(String list) throws ParseException {
-        if (list == null) {
-            return;
-        } else {
-            String[] tokens = split(list,';');
-            for (int i=0;tokens != null && i<tokens.length;i++) {
-                String parameter = tokens[i];
-                int eq = parameter.indexOf("=");
-                if (eq == -1) {
-                    throw new ParseException(parameter);
-                } else {
-                    String name = parameter.substring(0, eq);
-                    String value = parameter.substring(eq + 1).trim();
-                    if (value.charAt(0) == '"') {
-                        int end = value.lastIndexOf('"');
-                        value = value.substring(1, end);
+        // figure out how parameter handling is to be performed.
+        getInitialProperties();
+        // get a token parser for the type information
+        HeaderTokenizer tokenizer = new HeaderTokenizer(list, HeaderTokenizer.MIME);
+        while (true) {
+            HeaderTokenizer.Token token = tokenizer.next();
+
+            switch (token.getType()) {
+                // the EOF token terminates parsing.
+                case HeaderTokenizer.Token.EOF:
+                    return;
+
+                // each new parameter is separated by a semicolon, including the first, which separates
+                // the parameters from the main part of the header.
+                case ';':
+                    // the next token needs to be a parameter name
+                    token = tokenizer.next();
+                    // allow a trailing semicolon on the parameters.
+                    if (token.getType() == HeaderTokenizer.Token.EOF) {
+                        return;
                     }
-                    set(name, value);
-                }
+
+                    if (token.getType() != HeaderTokenizer.Token.ATOM) {
+                        throw new ParseException("Invalid parameter name: " + token.getValue());
+                    }
+
+                    // get the parameter name as a lower case version for better mapping.
+                    String name = token.getValue().toLowerCase();
+
+                    token = tokenizer.next();
+
+                    // parameters are name=value, so we must have the "=" here.
+                    if (token.getType() != '=') {
+                        throw new ParseException("Missing '='");
+                    }
+
+                    // now the value, which may be an atom or a literal
+                    token = tokenizer.next();
+
+                    if (token.getType() != HeaderTokenizer.Token.ATOM && token.getType() != HeaderTokenizer.Token.QUOTEDSTRING) {
+                        throw new ParseException("Invalid parameter value: " + token.getValue());
+                    }
+
+                    String value = token.getValue();
+                    String encodedValue = null;
+
+                    // we might have to do some additional decoding.  A name that ends with "*"
+                    // is marked as being encoded, so if requested, we decode the value.
+                    if (decodeParameters && name.endsWith("*")) {
+                        // the name needs to be pruned of the marker, and we need to decode the value.
+                        name = name.substring(0, name.length() - 1);
+                        // get a new decoder
+                        RFC2231Encoder decoder = new RFC2231Encoder(HeaderTokenizer.MIME);
+
+                        try {
+                            // decode the value
+                            encodedValue = decoder.decode(value);
+                        } catch (Exception e) {
+                            // if we're doing things strictly, then raise a parsing exception for errors.
+                            // otherwise, leave the value in its current state.
+                            if (decodeParametersStrict) {
+                                throw new ParseException("Invalid RFC2231 encoded parameter");
+                            }
+                        }
+                    }
+                    _parameters.put(name, new ParameterValue(name, value, encodedValue));
+
+                    break;
+
+                default:
+                    throw new ParseException("Missing ';'");
+
             }
         }
     }
 
+    /**
+     * Get the initial parameters that control parsing and values.
+     * These parameters are controlled by System properties.
+     */
+    private void getInitialProperties() {
+        decodeParameters = SessionUtil.getBooleanProperty(MIME_DECODEPARAMETERS, false);
+        decodeParametersStrict = SessionUtil.getBooleanProperty(MIME_DECODEPARAMETERS_STRICT, false);
+        encodeParameters = SessionUtil.getBooleanProperty(MIME_ENCODEPARAMETERS, false);
+    }
+
     public int size() {
         return _parameters.size();
     }
 
     public String get(String name) {
-        return (String) _parameters.get(name);
+        ParameterValue value = (ParameterValue)_parameters.get(name.toLowerCase());
+        if (value != null) {
+            return value.value;
+        }
+        return null;
     }
 
     public void set(String name, String value) {
-        _parameters.put(name.trim(), value.trim());
+        name = name.toLowerCase();
+        _parameters.put(name, new ParameterValue(name, value));
     }
 
     public void remove(String name) {
@@ -82,74 +168,98 @@
     }
 
     public String toString() {
-        Iterator it = _parameters.entrySet().iterator();
-        StringBuffer result = new StringBuffer();
-        while (it.hasNext()) {
-            Map.Entry entry = (Map.Entry) it.next();
-            result.append(";");
-            String key = (String)entry.getKey();
-            // we occasionally end up with null entries.  If we encounter one, just skip over it.
-            if (key == null || key.length() == 0) {
-                continue;
-            }
-            result.append(key);
-            result.append("=");
-            // this could contain special characters, so make sure it gets quoted if required.
-            result.append(MimeUtility.quote((String)entry.getValue(), HeaderTokenizer.MIME));
-        }
-        return result.toString();
-    }
-
-    private static String[] split(String str, char separatorChar) {
-        if (str == null) {
-            return null;
-        }
-        int len = str.length();
-        if (len == 0) {
-            return new String[0];
-        }
-        List list = new ArrayList();
-        int i = 0, start = 0;
-        boolean match = false;
-        while (i < len) {
-            char ch = str.charAt(i);
-            // Skip any separatorChar within quotes
-            if(ch == '\"') {
-                i++;
-                while(i < len && str.charAt(i) != '\"'){
-                    i++;
+        // we need to perform folding, but out starting point is 0.
+        return toString(0);
+    }
+
+    public String toString(int used) {
+        StringBuffer stringValue = new StringBuffer();
+
+        Iterator values = _parameters.values().iterator();
+
+        while (values.hasNext()) {
+            ParameterValue parm = (ParameterValue)values.next();
+            // get the values we're going to encode in here.
+            String name = parm.getEncodedName();
+            String value = parm.toString();
+
+            // add the semicolon separator.  We also add a blank so that folding/unfolding rules can be used.
+            stringValue.append("; ");
+            used += 2;
+
+            // too big for the current header line?
+            if ((used + name.length() + value.length() + 1) > HEADER_SIZE_LIMIT) {
+                // and a CRLF-whitespace combo.
+                stringValue.append("\r\n ");
+                // reset the counter for a fresh line
+                used = 3;
+            }
+            // now add the keyword/value pair.
+            stringValue.append(name);
+            stringValue.append("=");
+
+            used += name.length() + 1;
+
+            // we're not out of the woods yet.  It is possible that the keyword/value pair by itself might
+            // be too long for a single line.  If that's the case, the we need to fold the value, if possible
+            if (used + value.length() > HEADER_SIZE_LIMIT) {
+                String foldedValue = ASCIIUtil.fold(used, value);
+
+                stringValue.append(foldedValue);
+
+                // now we need to sort out how much of the current line is in use.
+                int lastLineBreak = foldedValue.lastIndexOf('\n');
+
+                if (lastLineBreak != -1) {
+                    used = foldedValue.length() - lastLineBreak + 1;
                 }
-                if(i < len){
-                    i++;
-                    continue;
+                else {
+                    used += foldedValue.length();
                 }
             }
-            if (ch == separatorChar) {
-                if (match) {
-                    list.add(str.substring(start, i));
-                    match = false;
-                }
-                start = ++i;
-                continue;
+            else {
+                // no folding required, just append.
+                stringValue.append(value);
+                used += value.length();
             }
-            match = true;
-            i++;
         }
-        if (match) {
-            list.add(str.substring(start, i));
+
+        return stringValue.toString();
+    }
+
+
+    /**
+     * Utility class for representing parameter values in the list.
+     */
+    class ParameterValue {
+        public String name;              // the name of the parameter
+        public String value;             // the original set value
+        public String encodedValue;      // an encoded value, if encoding is requested.
+
+        public ParameterValue(String name, String value) {
+            this.name = name;
+            this.value = value;
+            this.encodedValue = null;
+        }
+
+        public ParameterValue(String name, String value, String encodedValue) {
+            this.name = name;
+            this.value = value;
+            this.encodedValue = encodedValue;
         }
-        return (String[]) list.toArray(new String[list.size()]);
-    }
-
-    public String toString(int lineBreak) {
-        // figure out where to break the line
-        String answer = toString();
-        if (answer.length() > lineBreak) {
-            // convert it to substring
-            // TODO Implement
-            return "";
-        } else {
-            return answer;
+
+        public String toString() {
+            if (encodedValue != null) {
+                return MimeUtility.quote(encodedValue, HeaderTokenizer.MIME);
+            }
+            return MimeUtility.quote(value, HeaderTokenizer.MIME);
+        }
+
+        public String getEncodedName() {
+            if (encodedValue != null) {
+                return name + "*";
+            }
+            return name;
         }
     }
 }

Added: geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/ASCIIUtil.java
URL: http://svn.apache.org/viewcvs/geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/ASCIIUtil.java?rev=381393&view=auto
==============================================================================
--- geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/ASCIIUtil.java (added)
+++ geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/ASCIIUtil.java Mon Feb 27 09:38:03 2006
@@ -0,0 +1,471 @@
+/**
+ *
+ * Copyright 2003-2004 The Apache Software Foundation
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.geronimo.mail.util;
+
+import java.io.BufferedInputStream;
+import java.io.InputStream;
+import java.io.IOException;
+
+
+/**
+ * Set of utility classes for handling common encoding-related
+ * manipulations.
+ */
+public class ASCIIUtil {
+    private static final String MIME_FOLDTEXT = "mail.mime.foldtext";
+    private static final int FOLD_THRESHOLD = 76;
+
+    /**
+     * Test to see if this string contains only US-ASCII (i.e., 7-bit
+     * ASCII) charactes.
+     *
+     * @param s      The test string.
+     *
+     * @return true if this is a valid 7-bit ASCII encoding, false if it
+     *         contains any non-US ASCII characters.
+     */
+    static public boolean isAscii(String s) {
+        for (int i = 0; i < s.length(); i++) {
+            if (!isAscii(s.charAt(i))) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Test to see if a given character can be considered "valid" ASCII.
+     * The excluded characters are the control characters less than
+     * 32, 8-bit characters greater than 127, EXCEPT the CR, LF and
+     * tab characters ARE considered value (all less than 32).
+     *
+     * @param ch     The test character.
+     *
+     * @return true if this character meets the "ascii-ness" criteria, false
+     *         otherwise.
+     */
+    static public boolean isAscii(int ch) {
+        // these are explicitly considered valid.
+        if (ch == '\r' || ch == '\n' || ch == '\t') {
+            return true;
+        }
+
+        // anything else outside the range is just plain wrong.
+        if (ch >= 127 || ch < 32) {
+            return false;
+        }
+        return true;
+    }
+
+
+    /**
+     * Examine a stream of text and make a judgement on what encoding
+     * type should be used for the text.  Ideally, we want to use 7bit
+     * encoding to determine this, but we may need to use either quoted-printable
+     * or base64.  The choice is made on the ratio of 7-bit characters to non-7bit.
+     *
+     * @param content     An input stream for the content we're examining.
+     *
+     * @exception IOException
+     */
+    public static String getTextTransferEncoding(InputStream content) throws IOException {
+
+        // for efficiency, we'll read in blocks.
+        BufferedInputStream in = new BufferedInputStream(content, 4096);
+
+        int span = 0;            // span of characters without a line break.
+        boolean containsLongLines = false;
+        int asciiChars = 0;
+        int nonAsciiChars = 0;
+
+        while (true) {
+            int ch = in.read();
+            // if we hit an EOF here, go decide what type we've actually found.
+            if (ch == -1) {
+                break;
+            }
+
+            // we found a linebreak.  Reset the line length counters on either one.  We don't
+            // really need to validate here.
+            if (ch == '\n' || ch == '\r') {
+                // hit a line end, reset our line length counter
+                span = 0;
+            }
+            else {
+                span++;
+                // the text has long lines, we can't transfer this as unencoded text.
+                if (span > 998) {
+                    containsLongLines = true;
+                }
+
+                // non-ascii character, we have to transfer this in binary.
+                if (!isAscii(ch)) {
+                    nonAsciiChars++;
+                }
+                else {
+                    asciiChars++;
+                }
+            }
+        }
+
+        // looking good so far, only valid chars here.
+        if (nonAsciiChars == 0) {
+            // does this contain long text lines?  We need to use a Q-P encoding which will
+            // be only slightly longer, but handles folding the longer lines.
+            if (containsLongLines) {
+                return "quoted-printable";
+            }
+            else {
+                // ideal!  Easiest one to handle.
+                return "7bit";
+            }
+        }
+        else {
+            // mostly characters requiring encoding?  Base64 is our best bet.
+            if (nonAsciiChars > asciiChars) {
+                return "base64";
+            }
+            else {
+                // Q-P encoding will use fewer bytes than the full Base64.
+                return "quoted-printable";
+            }
+        }
+    }
+
+
+    /**
+     * Examine a stream of text and make a judgement on what encoding
+     * type should be used for the text.  Ideally, we want to use 7bit
+     * encoding to determine this, but we may need to use either quoted-printable
+     * or base64.  The choice is made on the ratio of 7-bit characters to non-7bit.
+     *
+     * @param content     A string for the content we're examining.
+     */
+    public static String getTextTransferEncoding(String content) {
+
+        int asciiChars = 0;
+        int nonAsciiChars = 0;
+
+        for (int i = 0; i < content.length(); i++) {
+            int ch = content.charAt(i);
+
+            // non-ascii character, we have to transfer this in binary.
+            if (!isAscii(ch)) {
+                nonAsciiChars++;
+            }
+            else {
+                asciiChars++;
+            }
+        }
+
+        // looking good so far, only valid chars here.
+        if (nonAsciiChars == 0) {
+            // ideal!  Easiest one to handle.
+            return "7bit";
+        }
+        else {
+            // mostly characters requiring encoding?  Base64 is our best bet.
+            if (nonAsciiChars > asciiChars) {
+                return "base64";
+            }
+            else {
+                // Q-P encoding will use fewer bytes than the full Base64.
+                return "quoted-printable";
+            }
+        }
+    }
+
+
+    /**
+     * Determine if the transfer encoding looks like it might be
+     * valid ascii text, and thus transferable as 7bit code.  In
+     * order for this to be true, all characters must be valid
+     * 7-bit ASCII code AND all line breaks must be properly formed
+     * (JUST '\r\n' sequences).  7-bit transfers also
+     * typically have a line limit of 1000 bytes (998 + the CRLF), so any
+     * stretch of charactes longer than that will also force Base64 encoding.
+     *
+     * @param content     An input stream for the content we're examining.
+     *
+     * @exception IOException
+     */
+    public static String getBinaryTransferEncoding(InputStream content) throws IOException {
+
+        // for efficiency, we'll read in blocks.
+        BufferedInputStream in = new BufferedInputStream(content, 4096);
+
+        int previousChar = 0;
+        int span = 0;            // span of characters without a line break.
+
+        while (true) {
+            int ch = in.read();
+            // if we hit an EOF here, we've only found valid text so far, so we can transfer this as
+            // 7-bit ascii.
+            if (ch == -1) {
+                return "7bit";
+            }
+
+            // we found a newline, this is only valid if the previous char was the '\r'
+            if (ch == '\n') {
+                // malformed linebreak?  force this to base64 encoding.
+                if (previousChar != '\r') {
+                    return "base64";
+                }
+                // hit a line end, reset our line length counter
+                span = 0;
+            }
+            else {
+                span++;
+                // the text has long lines, we can't transfer this as unencoded text.
+                if (span > 998) {
+                    return "base64";
+                }
+
+                // non-ascii character, we have to transfer this in binary.
+                if (!isAscii(ch)) {
+                    return "base64";
+                }
+            }
+            previousChar = ch;
+        }
+    }
+
+
+    /**
+     * Perform RFC 2047 text folding on a string of text.
+     *
+     * @param used   The amount of text already "used up" on this line.  This is
+     *               typically the length of a message header that this text
+     *               get getting added to.
+     * @param s      The text to fold.
+     *
+     * @return The input text, with linebreaks inserted at appropriate fold points.
+     */
+    public static String fold(int used, String s) {
+        // if folding is disable, unfolding is also.  Return the string unchanged.
+        if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
+            return s;
+        }
+
+        int end;
+
+        // now we need to strip off any trailing "whitespace", where whitespace is blanks, tabs,
+        // and line break characters.
+        for (end = s.length() - 1; end >= 0; end--) {
+            int ch = s.charAt(end);
+            if (ch != ' ' && ch != '\t' ) {
+                break;
+            }
+        }
+
+        // did we actually find something to remove?  Shorten the String to the trimmed length
+        if (end != s.length() - 1) {
+            s = s.substring(0, end + 1);
+        }
+
+        // does the string as it exists now not require folding?  We can just had that back right off.
+        if (s.length() + used <= FOLD_THRESHOLD) {
+            return s;
+        }
+
+        // get a buffer for the length of the string, plus room for a few line breaks.
+        // these are soft line breaks, so we generally need more that just the line breaks (an escape +
+        // CR + LF + leading space on next line);
+        StringBuffer newString = new StringBuffer(s.length() + 8);
+
+
+        // now keep chopping this down until we've accomplished what we need.
+        while (used + s.length() > FOLD_THRESHOLD) {
+            int breakPoint = -1;
+            char breakChar = 0;
+
+            // now scan for the next place where we can break.
+            for (int i = 0; i < s.length(); i++) {
+                // have we passed the fold limit?
+                if (used + i > FOLD_THRESHOLD) {
+                    // if we've already seen a blank, then stop now.  Otherwise
+                    // we keep going until we hit a fold point.
+                    if (breakPoint != -1) {
+                        break;
+                    }
+                }
+                char ch = s.charAt(i);
+
+                // a white space character?
+                if (ch == ' ' || ch == '\t') {
+                    // this might be a run of white space, so skip over those now.
+                    breakPoint = i;
+                    // we need to maintain the same character type after the inserted linebreak.
+                    breakChar = ch;
+                    i++;
+                    while (i < s.length()) {
+                        ch = s.charAt(i);
+                        if (ch != ' ' && ch != '\t') {
+                            break;
+                        }
+                        i++;
+                    }
+                }
+                // found an embedded new line.  Escape this so that the unfolding process preserves it.
+                else if (ch == '\n') {
+                    newString.append('\\');
+                    newString.append('\n');
+                }
+                else if (ch == '\r') {
+                    newString.append('\\');
+                    newString.append('\n');
+                    i++;
+                    // if this is a CRLF pair, add the second char also
+                    if (i < s.length() && s.charAt(i) == '\n') {
+                        newString.append('\r');
+                    }
+                }
+
+            }
+            // no fold point found, we punt, append the remainder and leave.
+            if (breakPoint == -1) {
+                newString.append(s);
+                return newString.toString();
+            }
+            newString.append(s.substring(0, breakPoint));
+            newString.append("\r\n");
+            newString.append(breakChar);
+            // chop the string
+            s = s.substring(breakPoint + 1);
+            // start again, and we've used the first char of the limit already with the whitespace char.
+            used = 1;
+        }
+
+        // add on the remainder, and return
+        newString.append(s);
+        return newString.toString();
+    }
+
+    /**
+     * Unfold a folded string.  The unfolding process will remove
+     * any line breaks that are not escaped and which are also followed
+     * by whitespace characters.
+     *
+     * @param s      The folded string.
+     *
+     * @return A new string with unfolding rules applied.
+     */
+    public static String unfold(String s) {
+        // if folding is disable, unfolding is also.  Return the string unchanged.
+        if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
+            return s;
+        }
+
+        // if there are no line break characters in the string, we can just return this.
+        if (s.indexOf('\n') < 0 && s.indexOf('\r') < 0) {
+            return s;
+        }
+
+        // we need to scan and fix things up.
+        int length = s.length();
+
+        StringBuffer newString = new StringBuffer(length);
+
+        // scan the entire string
+        for (int i = 0; i < length; i++) {
+            int ch = s.charAt(i);
+
+            // we have a backslash.  In folded strings, escape characters are only processed as such if
+            // they preceed line breaks.  Otherwise, we leave it be.
+            if (ch == '\\') {
+                // escape at the very end?  Just add the character.
+                if (i == length - 1) {
+                    newString.append(ch);
+                }
+                else {
+                    int nextChar = s.charAt(i + 1);
+
+                    // naked newline?  Add the new line to the buffer, and skip the escape char.
+                    if (nextChar == '\n') {
+                        newString.append('\n');
+                        i++;
+                    }
+                    else if (nextChar == '\r') {
+                        // just the CR left?  Add it, removing the escape.
+                        if (i == length - 2 || s.charAt(i + 2) != '\r') {
+                            newString.append('\r');
+                            i++;
+                        }
+                        else {
+                            // toss the escape, add both parts of the CRLF, and skip over two chars.
+                            newString.append('\r');
+                            newString.append('\n');
+                            i += 2;
+                        }
+                    }
+                    else {
+                        // an escape for another purpose, just copy it over.
+                        newString.append(ch);
+                    }
+                }
+            }
+            // we have an unescaped line break
+            else if (ch == '\n' || ch == '\r') {
+                // remember the position in case we need to backtrack.
+                int lineBreak = i;
+                boolean CRLF = false;
+
+                if (ch == '\r') {
+                    // check to see if we need to step over this.
+                    if (i < length - 1 && s.charAt(i + 1) == '\n') {
+                        i++;
+                        // flag the type so we know what we might need to preserve.
+                        CRLF = true;
+                    }
+                }
+
+                // get a temp position scanner.
+                int scan = i + 1;
+
+                // does a blank follow this new line?  we need to scrap the new line and reduce the leading blanks
+                // down to a single blank.
+                if (scan < length && s.charAt(scan) == ' ') {
+                    // add the character
+                    newString.append(' ');
+
+                    // scan over the rest of the blanks
+                    i = scan + 1;
+                    while (i < length && s.charAt(i) == ' ') {
+                        i++;
+                    }
+                    // we'll increment down below, so back up to the last blank as the current char.
+                    i--;
+                }
+                else {
+                    // we must keep this line break.  Append the appropriate style.
+                    if (CRLF) {
+                        newString.append("\r\n");
+                    }
+                    else {
+                        newString.append(ch);
+                    }
+                }
+            }
+            else {
+                // just a normal, ordinary character
+                newString.append(ch);
+            }
+        }
+        return newString.toString();
+    }
+}

Propchange: geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/ASCIIUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/ASCIIUtil.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/ASCIIUtil.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/Base64.java
URL: http://svn.apache.org/viewcvs/geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/Base64.java?rev=381393&r1=381392&r2=381393&view=diff
==============================================================================
--- geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/Base64.java (original)
+++ geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/Base64.java Mon Feb 27 09:38:03 2006
@@ -132,4 +132,19 @@
     {
         return encoder.decode(data, out);
     }
+
+    /**
+     * decode the base 64 encoded String data writing it to the given output stream,
+     * whitespace characters will be ignored.
+     *
+     * @param data   The array data to decode.
+     * @param out    The output stream for the data.
+     *
+     * @return the number of bytes produced.
+     * @exception IOException
+     */
+    public static int decode(byte [] data, OutputStream out) throws IOException
+    {
+        return encoder.decode(data, 0, data.length, out);
+    }
 }

Added: geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/Base64DecoderStream.java
URL: http://svn.apache.org/viewcvs/geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/Base64DecoderStream.java?rev=381393&view=auto
==============================================================================
--- geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/Base64DecoderStream.java (added)
+++ geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/Base64DecoderStream.java Mon Feb 27 09:38:03 2006
@@ -0,0 +1,208 @@
+/**
+ *
+ * Copyright 2003-2004 The Apache Software Foundation
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.geronimo.mail.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.FilterInputStream;
+
+/**
+ * An implementation of a FilterInputStream that decodes the
+ * stream data in BASE64 encoding format.  This version does the
+ * decoding "on the fly" rather than decoding a single block of
+ * data.  Since this version is intended for use by the MimeUtilty class,
+ * it also handles line breaks in the encoded data.
+ */
+public class Base64DecoderStream extends FilterInputStream {
+
+    static protected final String MAIL_BASE64_IGNOREERRORS = "mail.mime.base64.ignoreerrors";
+
+    // number of decodeable units we'll try to process at one time.  We'll attempt to read that much
+    // data from the input stream and decode in blocks.
+    static protected final int BUFFERED_UNITS = 2000;
+
+    // our decoder for processing the data
+    protected Base64Encoder decoder = new Base64Encoder();
+
+    // can be overridden by a system property.
+    protected boolean ignoreErrors = false;
+
+    // buffer for reading in chars for decoding (which can support larger bulk reads)
+    protected byte[] encodedChars = new byte[BUFFERED_UNITS * 4];
+    // a buffer for one decoding unit's worth of data (3 bytes).  This is the minimum amount we
+    // can read at one time.
+    protected byte[] decodedChars = new byte[BUFFERED_UNITS * 3];
+    // count of characters in the buffer
+    protected int decodedCount = 0;
+    // index of the next decoded character
+    protected int decodedIndex = 0;
+
+
+    public Base64DecoderStream(InputStream in) {
+        super(in);
+        // make sure we get the ignore errors flag
+        ignoreErrors = SessionUtil.getBooleanProperty(MAIL_BASE64_IGNOREERRORS, false);
+    }
+
+    /**
+     * Test for the existance of decoded characters in our buffer
+     * of decoded data.
+     *
+     * @return True if we currently have buffered characters.
+     */
+    private boolean dataAvailable() {
+        return decodedCount != 0;
+    }
+
+    /**
+     * Get the next buffered decoded character.
+     *
+     * @return The next decoded character in the buffer.
+     */
+    private byte getBufferedChar() {
+        decodedCount--;
+        return decodedChars[decodedIndex++];
+    }
+
+    /**
+     * Decode a requested number of bytes of data into a buffer.
+     *
+     * @return true if we were able to obtain more data, false otherwise.
+     */
+    private boolean decodeStreamData() throws IOException {
+        decodedIndex = 0;
+
+        // fill up a data buffer with input data
+        int readCharacters = fillEncodedBuffer();
+
+        if (readCharacters > 0) {
+            decodedCount =  decoder.decode(encodedChars, 0, readCharacters, decodedChars);
+            return true;
+        }
+        return false;
+    }
+
+
+    /**
+     * Retrieve a single byte from the decoded characters buffer.
+     *
+     * @return The decoded character or -1 if there was an EOF condition.
+     */
+    private int getByte() throws IOException {
+        if (!dataAvailable()) {
+            if (!decodeStreamData()) {
+                return -1;
+            }
+        }
+        decodedCount--;
+        return decodedChars[decodedIndex++];
+    }
+
+    private int getBytes(byte[] data, int offset, int length) throws IOException {
+
+        int readCharacters = 0;
+        while (length > 0) {
+            // need data?  Try to get some
+            if (!dataAvailable()) {
+                // if we can't get this, return a count of how much we did get (which may be -1).
+                if (!decodeStreamData()) {
+                    return readCharacters > 0 ? readCharacters : -1;
+                }
+            }
+
+            // now copy some of the data from the decoded buffer to the target buffer
+            int copyCount = Math.min(decodedCount, length);
+            System.arraycopy(data, offset, decodedChars, decodedIndex, copyCount);
+            decodedIndex += copyCount;
+            decodedCount -= copyCount;
+            offset += copyCount;
+            length -= copyCount;
+            readCharacters += copyCount;
+        }
+        return readCharacters;
+    }
+
+
+    /**
+     * Fill our buffer of input characters for decoding from the
+     * stream.  This will attempt read a full buffer, but will
+     * terminate on an EOF or read error.  This will filter out
+     * non-Base64 encoding chars and will only return a valid
+     * multiple of 4 number of bytes.
+     *
+     * @return The count of characters read.
+     */
+    private int fillEncodedBuffer() throws IOException
+    {
+        int readCharacters = 0;
+
+        while (true) {
+            // get the next character from the stream
+            int ch = in.read();
+            // did we hit an EOF condition?
+            if (ch == -1) {
+                // now check to see if this is normal, or potentially an error
+                // if we didn't get characters as a multiple of 4, we may need to complain about this.
+                if ((readCharacters % 4) != 0) {
+                    // the error checking can be turned off...normally it isn't
+                    if (!ignoreErrors) {
+                        throw new IOException("Base64 encoding error, data truncated");
+                    }
+                    // we're ignoring errors, so round down to a multiple and return that.
+                    return (readCharacters / 4) * 4;
+                }
+                // return the count.
+                return readCharacters;
+            }
+            // if this character is valid in a Base64 stream, copy it to the buffer.
+            else if (decoder.isValidBase64(ch)) {
+                encodedChars[readCharacters++] = (byte)ch;
+                // if we've filled up the buffer, time to quit.
+                if (readCharacters >= encodedChars.length) {
+                    return readCharacters;
+                }
+            }
+
+            // we're filtering out whitespace and CRLF characters, so just ignore these
+        }
+    }
+
+
+    // in order to function as a filter, these streams need to override the different
+    // read() signature.
+
+    public int read() throws IOException
+    {
+        return getByte();
+    }
+
+
+    public int read(byte [] buffer, int offset, int length) throws IOException {
+        return getBytes(buffer, offset, length);
+    }
+
+
+    public boolean markSupported() {
+        return false;
+    }
+
+
+    public int available() throws IOException {
+        return ((in.available() / 4) * 3) + decodedCount;
+    }
+}

Propchange: geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/Base64DecoderStream.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/Base64DecoderStream.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: geronimo/specs/trunk/geronimo-spec-javamail/src/main/java/org/apache/geronimo/mail/util/Base64DecoderStream.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain



Mime
View raw message