Return-Path: Delivered-To: apmail-james-mime4j-dev-archive@minotaur.apache.org Received: (qmail 18945 invoked from network); 31 Dec 2009 01:18:48 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 31 Dec 2009 01:18:48 -0000 Received: (qmail 94770 invoked by uid 500); 31 Dec 2009 01:18:48 -0000 Delivered-To: apmail-james-mime4j-dev-archive@james.apache.org Received: (qmail 94733 invoked by uid 500); 31 Dec 2009 01:18:48 -0000 Mailing-List: contact mime4j-dev-help@james.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: mime4j-dev@james.apache.org Delivered-To: mailing list mime4j-dev@james.apache.org Received: (qmail 94706 invoked by uid 99); 31 Dec 2009 01:18:40 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 31 Dec 2009 01:18:40 +0000 X-ASF-Spam-Status: No, hits=-2.3 required=5.0 tests=AWL,BAYES_00 X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 31 Dec 2009 01:18:33 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 8D7C92388996; Thu, 31 Dec 2009 01:18:13 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r894750 - in /james/mime4j/branches/cycleclean/core/src: main/java/org/apache/james/mime4j/io/ main/java/org/apache/james/mime4j/parser/ test/java/org/apache/james/mime4j/message/ Date: Thu, 31 Dec 2009 01:18:13 -0000 To: mime4j-dev@james.apache.org From: bago@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20091231011813.8D7C92388996@eris.apache.org> Author: bago Date: Thu Dec 31 01:18:12 2009 New Revision: 894750 URL: http://svn.apache.org/viewvc?rev=894750&view=rev Log: Added malformedHeaderStartsBody boolean option to MimeEntityConfig (MIME4J-58) Added unread(ByteArrayBuffer) method to LineReaderInputStream, only implemented by BufferedLineReaderInputStream. MimeEntity parseField now optionally unread the field (when in lenient mode). If the unread fails it throws an exception. Improved IllegalArgumentException handling in MimeBoundaryInputStream (boundary vs buffersize is more secure than before). MimeBoundaryInputStream.fillBuffer had a bug (while i > 0) replaced with (while i > buffer.pos()). It was never hit because of current "refill" behaviour. BufferedLineReaderInputStream now supports unread(ByteArrayBuffer): the unread simply put the passed buffer in place of the original buffer and consume it. The first time fillBuffer is called and the buffer is completely consumed the old buffer is restore. If fillbuffer is called before the buffer is consumed we throw an exception (never happens for the current refill behaviour, but protect future changes with an explicit exception). Modified: james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/BufferedLineReaderInputStream.java james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/LineReaderInputStream.java james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/LineReaderInputStreamAdaptor.java james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/MimeBoundaryInputStream.java james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/parser/AbstractEntity.java james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/parser/MimeEntity.java james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/parser/MimeEntityConfig.java james/mime4j/branches/cycleclean/core/src/test/java/org/apache/james/mime4j/message/MessageHeadlessParserTest.java Modified: james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/BufferedLineReaderInputStream.java URL: http://svn.apache.org/viewvc/james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/BufferedLineReaderInputStream.java?rev=894750&r1=894749&r2=894750&view=diff ============================================================================== --- james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/BufferedLineReaderInputStream.java (original) +++ james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/BufferedLineReaderInputStream.java Thu Dec 31 01:18:12 2009 @@ -32,8 +32,13 @@ private boolean truncated; - private byte[] buffer; + boolean tempBuffer = false; + + private byte[] origBuffer; + private int origBufpos; + private int origBuflen; + private byte[] buffer; private int bufpos; private int buflen; @@ -65,7 +70,7 @@ private void expand(int newlen) { byte newbuffer[] = new byte[newlen]; - int len = this.buflen - this.bufpos; + int len = bufferLen(); if (len > 0) { System.arraycopy(this.buffer, this.bufpos, newbuffer, this.bufpos, len); } @@ -79,9 +84,21 @@ } public int fillBuffer() throws IOException { + if (tempBuffer) { + // we was on tempBuffer. + // check that we completed the tempBuffer + if (bufpos != buflen) throw new IllegalStateException("unread only works when a buffer is fully read before the next refill is asked!"); + // restore the original buffer + buffer = origBuffer; + buflen = origBuflen; + bufpos = origBufpos; + tempBuffer = false; + // return that we just read bufferLen data. + return bufferLen(); + } // compact the buffer if necessary - if (this.bufpos > 0) { - int len = this.buflen - this.bufpos; + if (this.bufpos > 0) { // could swtich to (this.buffer.length / 2) but needs a 4*boundary capacity, then (instead of 2). + int len = bufferLen(); if (len > 0) { System.arraycopy(this.buffer, this.bufpos, this.buffer, 0, len); } @@ -100,8 +117,12 @@ } } + private int bufferLen() { + return this.buflen - this.bufpos; + } + public boolean hasBufferedData() { - return this.bufpos < this.buflen; + return bufferLen() > 0; } public void truncate() { @@ -109,11 +130,13 @@ this.truncated = true; } + protected boolean readAllowed() { + return !this.truncated; + } + @Override public int read() throws IOException { - if (this.truncated) { - return -1; - } + if (!readAllowed()) return -1; int noRead = 0; while (!hasBufferedData()) { noRead = fillBuffer(); @@ -126,9 +149,7 @@ @Override public int read(final byte[] b, int off, int len) throws IOException { - if (this.truncated) { - return -1; - } + if (!readAllowed()) return -1; if (b == null) { return 0; } @@ -139,7 +160,7 @@ return -1; } } - int chunk = this.buflen - this.bufpos; + int chunk = bufferLen(); if (chunk > len) { chunk = len; } @@ -150,9 +171,7 @@ @Override public int read(final byte[] b) throws IOException { - if (this.truncated) { - return -1; - } + if (!readAllowed()) return -1; if (b == null) { return 0; } @@ -170,9 +189,8 @@ if (dst == null) { throw new IllegalArgumentException("Buffer may not be null"); } - if (this.truncated) { - return -1; - } + if (!readAllowed()) return -1; + int total = 0; boolean found = false; int bytesRead = 0; @@ -207,7 +225,7 @@ } } - /** + /** * Implements quick search algorithm as published by *

* SUNDAY D.M., 1990, @@ -220,7 +238,7 @@ throw new IllegalArgumentException("Pattern may not be null"); } if (off < this.bufpos || len < 0 || off + len > this.buflen) { - throw new IndexOutOfBoundsException(); + throw new IndexOutOfBoundsException("looking for "+off+"("+len+")"+" in "+bufpos+"/"+buflen); } if (len < pattern.length) { return -1; @@ -284,43 +302,43 @@ } public int indexOf(byte b) { - return indexOf(b, this.bufpos, this.buflen - this.bufpos); + return indexOf(b, this.bufpos, bufferLen()); } public byte charAt(int pos) { if (pos < this.bufpos || pos > this.buflen) { - throw new IndexOutOfBoundsException(); + throw new IndexOutOfBoundsException("looking for "+pos+" in "+bufpos+"/"+buflen); } return this.buffer[pos]; } - public byte[] buf() { + protected byte[] buf() { return this.buffer; } - public int pos() { + protected int pos() { return this.bufpos; } - public int limit() { + protected int limit() { return this.buflen; } - public int length() { - return this.buflen - this.bufpos; + protected int length() { + return bufferLen(); } public int capacity() { return this.buffer.length; } - public int skip(int n) { - int chunk = Math.min(n, this.buflen - this.bufpos); + protected int skip(int n) { + int chunk = Math.min(n, bufferLen()); this.bufpos += chunk; return chunk; } - public void clear() { + private void clear() { this.bufpos = 0; this.buflen = 0; } @@ -342,4 +360,16 @@ return buffer.toString(); } + @Override + public boolean unread(ByteArrayBuffer buf) { + origBuffer = buffer; + origBuflen = buflen; + origBufpos = bufpos; + bufpos = 0; + buflen = buf.length(); + buffer = buf.buffer(); + tempBuffer = true; + return true; + } + } Modified: james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/LineReaderInputStream.java URL: http://svn.apache.org/viewvc/james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/LineReaderInputStream.java?rev=894750&r1=894749&r2=894750&view=diff ============================================================================== --- james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/LineReaderInputStream.java (original) +++ james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/LineReaderInputStream.java Thu Dec 31 01:18:12 2009 @@ -47,5 +47,12 @@ */ public abstract int readLine(final ByteArrayBuffer dst) throws MaxLineLimitException, IOException; + + /** + * Tries to unread the last read line. + * + * @return true if the unread has been succesfull. + */ + public abstract boolean unread(ByteArrayBuffer buf); } Modified: james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/LineReaderInputStreamAdaptor.java URL: http://svn.apache.org/viewvc/james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/LineReaderInputStreamAdaptor.java?rev=894750&r1=894749&r2=894750&view=diff ============================================================================== --- james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/LineReaderInputStreamAdaptor.java (original) +++ james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/LineReaderInputStreamAdaptor.java Thu Dec 31 01:18:12 2009 @@ -117,4 +117,13 @@ public String toString() { return "[LineReaderInputStreamAdaptor: " + bis + "]"; } + + @Override + public boolean unread(ByteArrayBuffer buf) { + if (bis != null) { + return bis.unread(buf); + } else { + return false; + } + } } Modified: james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/MimeBoundaryInputStream.java URL: http://svn.apache.org/viewvc/james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/MimeBoundaryInputStream.java?rev=894750&r1=894749&r2=894750&view=diff ============================================================================== --- james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/MimeBoundaryInputStream.java (original) +++ james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/io/MimeBoundaryInputStream.java Thu Dec 31 01:18:12 2009 @@ -51,7 +51,8 @@ public MimeBoundaryInputStream(BufferedLineReaderInputStream inbuffer, String boundary) throws IOException { super(inbuffer); - if (inbuffer.capacity() <= boundary.length()) { + + if (inbuffer.capacity() < boundary.length() * 2) { throw new IllegalArgumentException("Boundary is too long"); } this.buffer = inbuffer; @@ -72,6 +73,7 @@ } this.boundary[i + 2] = ch; } + fillBuffer(); } @@ -91,19 +93,25 @@ public boolean markSupported() { return false; } + + public boolean readAllowed() throws IOException { + if (completed) { + return false; + } + // System.out.println("rA!"); + if (endOfStream() && !hasData()) { + skipBoundary(); + return false; + } + return true; + } /** * @see java.io.InputStream#read() */ @Override public int read() throws IOException { - if (completed) { - return -1; - } - if (endOfStream() && !hasData()) { - skipBoundary(); - return -1; - } + if (!readAllowed()) return -1; for (;;) { if (hasData()) { return buffer.read(); @@ -117,13 +125,7 @@ @Override public int read(byte[] b, int off, int len) throws IOException { - if (completed) { - return -1; - } - if (endOfStream() && !hasData()) { - skipBoundary(); - return -1; - } + if (!readAllowed()) return -1; fillBuffer(); if (!hasData()) { return read(b, off, len); @@ -137,21 +139,15 @@ if (dst == null) { throw new IllegalArgumentException("Destination buffer may not be null"); } - if (completed) { - return -1; - } - if (endOfStream() && !hasData()) { - skipBoundary(); - return -1; - } - + if (!readAllowed()) return -1; + int total = 0; boolean found = false; int bytesRead = 0; while (!found) { if (!hasData()) { bytesRead = fillBuffer(); - if (!hasData() && endOfStream()) { + if (endOfStream() && !hasData()) { skipBoundary(); bytesRead = -1; break; @@ -179,7 +175,7 @@ } } - private boolean endOfStream() { + private boolean endOfStream() { return eof || atBoundary; } @@ -194,16 +190,18 @@ int bytesRead; if (!hasData()) { bytesRead = buffer.fillBuffer(); + if (bytesRead == -1) { + eof = true; + } } else { bytesRead = 0; } - eof = bytesRead == -1; int i = buffer.indexOf(boundary); // NOTE this currently check only for LF. It doesn't check for canonical CRLF // and neither for isolated CR. This will require updates according to MIME4J-60 - while (i > 0 && buffer.charAt(i-1) != '\n') { + while (i > buffer.pos() && buffer.charAt(i-1) != '\n') { // skip the "fake" boundary (it does not contain LF or CR so we cannot have // another boundary starting before this is complete. i = i + boundary.length; @@ -295,4 +293,9 @@ } return buffer.toString(); } + + @Override + public boolean unread(ByteArrayBuffer buf) { + return false; + } } Modified: james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/parser/AbstractEntity.java URL: http://svn.apache.org/viewvc/james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/parser/AbstractEntity.java?rev=894750&r1=894749&r2=894750&view=diff ============================================================================== --- james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/parser/AbstractEntity.java (original) +++ james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/parser/AbstractEntity.java Thu Dec 31 01:18:12 2009 @@ -160,6 +160,7 @@ headerCount++; // Strip away line delimiter + int origLen = fieldbuf.length(); int len = fieldbuf.length(); if (len > 0 && fieldbuf.byteAt(len - 1) == '\n') { len--; @@ -178,6 +179,12 @@ return true; } catch (MimeException e) { monitor(Event.INVALID_HEADER); + if (config.isMalformedHeaderStartsBody()) { + fieldbuf.setLength(origLen); + LineReaderInputStream instream = getDataStream(); + if (!instream.unread(fieldbuf)) throw new MimeParseEventException(Event.INVALID_HEADER); + return false; + } } } } Modified: james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/parser/MimeEntity.java URL: http://svn.apache.org/viewvc/james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/parser/MimeEntity.java?rev=894750&r1=894749&r2=894750&view=diff ============================================================================== --- james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/parser/MimeEntity.java (original) +++ james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/parser/MimeEntity.java Thu Dec 31 01:18:12 2009 @@ -168,6 +168,7 @@ private void createMimePartStream() throws MimeException, IOException { String boundary = body.getBoundary(); + // TODO move the following lines inside the MimeBoundaryInputStream constructor int bufferSize = 2 * boundary.length(); if (bufferSize < 4096) { bufferSize = 4096; Modified: james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/parser/MimeEntityConfig.java URL: http://svn.apache.org/viewvc/james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/parser/MimeEntityConfig.java?rev=894750&r1=894749&r2=894750&view=diff ============================================================================== --- james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/parser/MimeEntityConfig.java (original) +++ james/mime4j/branches/cycleclean/core/src/main/java/org/apache/james/mime4j/parser/MimeEntityConfig.java Thu Dec 31 01:18:12 2009 @@ -36,8 +36,9 @@ private long maxContentLen; private boolean countLineNumbers; private String defaultContentType; + private boolean malformedHeaderStartsBody; - public MimeEntityConfig() { + public MimeEntityConfig() { this.maximalBodyDescriptor = false; this.strictParsing = false; this.maxLineLen = 1000; @@ -49,6 +50,30 @@ } /** + * @see #setMalformedHeaderStartsBody(boolean) + * + * @return true if malformed header should "end" the headers and be + * part of the body + */ + public boolean isMalformedHeaderStartsBody() { + return malformedHeaderStartsBody; + } + + /** + * Define the behaviour for dealing with malformed headers while in lenient + * mode + * + * @param malformedHeaderStartsBody true to make the parser + * interpret a malformed header as end of the headers and + * as part of the body (as if the CRLF separator was missing). + * false to simply ignore malformed headers and + * continue parsing headers from the following line. + */ + public void setMalformedHeaderStartsBody(boolean malformedHeaderStartsBody) { + this.malformedHeaderStartsBody = malformedHeaderStartsBody; + } + + /** * Returns true if the maximum body descriptor should be * used, false for the default body descriptor. * Modified: james/mime4j/branches/cycleclean/core/src/test/java/org/apache/james/mime4j/message/MessageHeadlessParserTest.java URL: http://svn.apache.org/viewvc/james/mime4j/branches/cycleclean/core/src/test/java/org/apache/james/mime4j/message/MessageHeadlessParserTest.java?rev=894750&r1=894749&r2=894750&view=diff ============================================================================== --- james/mime4j/branches/cycleclean/core/src/test/java/org/apache/james/mime4j/message/MessageHeadlessParserTest.java (original) +++ james/mime4j/branches/cycleclean/core/src/test/java/org/apache/james/mime4j/message/MessageHeadlessParserTest.java Thu Dec 31 01:18:12 2009 @@ -19,6 +19,7 @@ package org.apache.james.mime4j.message; +import java.io.BufferedReader; import java.io.ByteArrayInputStream; import junit.framework.TestCase; @@ -29,6 +30,44 @@ public class MessageHeadlessParserTest extends TestCase { + + public void testMalformedHeaderShouldEndHeader() throws Exception { + String headlessContent = "Subject: my subject\r\n" + + "Hi, how are you?\r\n" + + "This is a simple message with no CRLFCELF between headers and body.\r\n" + + "ThisIsNotAnHeader: because this should be already in the body\r\n" + + "\r\n" + + "Instead this should be better parsed as a text/plain body\r\n"; + + MimeEntityConfig mimeEntityConfig = new MimeEntityConfig(); + mimeEntityConfig.setMalformedHeaderStartsBody(true); + Message message = new Message(new ByteArrayInputStream(headlessContent + .getBytes("UTF-8")), mimeEntityConfig); + assertEquals("text/plain", message.getMimeType()); + assertEquals(1, message.getHeader().getFields().size()); + BufferedReader reader = new BufferedReader(((TextBody) message.getBody()).getReader()); + String firstLine = reader.readLine(); + assertEquals("Hi, how are you?", firstLine); + } + + public void testSimpleNonMimeTextHeadless() throws Exception { + String headlessContent = "Hi, how are you?\r\n" + + "This is a simple message with no headers. While mime messages should start with\r\n" + + "header: headervalue\r\n" + + "\r\n" + + "Instead this should be better parsed as a text/plain body\r\n"; + + MimeEntityConfig mimeEntityConfig = new MimeEntityConfig(); + mimeEntityConfig.setMalformedHeaderStartsBody(true); + Message message = new Message(new ByteArrayInputStream(headlessContent + .getBytes("UTF-8")), mimeEntityConfig); + assertEquals("text/plain", message.getMimeType()); + assertEquals(0, message.getHeader().getFields().size()); + BufferedReader reader = new BufferedReader(((TextBody) message.getBody()).getReader()); + String firstLine = reader.readLine(); + assertEquals("Hi, how are you?", firstLine); + } + public void testMultipartFormContent() throws Exception { String contentType = "multipart/form-data; boundary=foo"; String headlessContent = "\r\n"