commons-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sebb <seb...@gmail.com>
Subject Re: svn commit: r1397783 - in /commons/proper/csv/trunk/src: main/java/org/apache/commons/csv/CSVFormat.java main/java/org/apache/commons/csv/Lexer.java test/java/org/apache/commons/csv/CSVFormatTest.java
Date Sat, 13 Oct 2012 12:37:18 GMT
On 13 October 2012 13:14, Gary Gregory <garydgregory@gmail.com> wrote:
> Thank you for the feedback Sebb. I'll do another pass later today.

See also my subsequent posting on the dev list.
I think that might resolve the performance issue without needing to
revert all the changes to CSVFormat.

> Gary
>
> On Oct 13, 2012, at 6:28, sebb <sebbaz@gmail.com> wrote:
>
>> On 13 October 2012 07:27,  <ggregory@apache.org> wrote:
>>> Author: ggregory
>>> Date: Sat Oct 13 06:27:52 2012
>>> New Revision: 1397783
>>>
>>> URL: http://svn.apache.org/viewvc?rev=1397783&view=rev
>>> Log:
>>> Remove DISABLED character hack.
>>
>> -1 (for now)
>>
>> Are you sure this change does not affect performance?
>> The Lexer code now has to do some boxing and unboxing.
>> Unboxing is not expensive, but boxing is potentially so.
>>
>> Also, the code now allows for a null delimiter - is that really sensible?
>>
>> Also CSVFormat.getDelimiter() is inconsistent - it returns char
>> whereas all the others return Character.
>>
>> I think the Lexer should stick to using char, particularly for the
>> delimiter which cannot be null.
>>
>>> Modified:
>>>    commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java
>>>    commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java
>>>    commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java
>>>
>>> Modified: commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java
>>> URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java?rev=1397783&r1=1397782&r2=1397783&view=diff
>>> ==============================================================================
>>> --- commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java
(original)
>>> +++ commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java
Sat Oct 13 06:27:52 2012
>>> @@ -18,6 +18,7 @@
>>> package org.apache.commons.csv;
>>>
>>> import static org.apache.commons.csv.Constants.COMMA;
>>> +import static org.apache.commons.csv.Constants.CR;
>>> import static org.apache.commons.csv.Constants.CRLF;
>>> import static org.apache.commons.csv.Constants.DOUBLE_QUOTE;
>>> import static org.apache.commons.csv.Constants.ESCAPE;
>>> @@ -38,30 +39,19 @@ public class CSVFormat implements Serial
>>>
>>>     private static final long serialVersionUID = 1L;
>>>
>>> -    private final char delimiter;
>>> -    private final char encapsulator;
>>> -    private final char commentStart;
>>> -    private final char escape;
>>> +    private final Character delimiter;
>>> +    private final Character encapsulator;
>>> +    private final Character commentStart;
>>> +    private final Character escape;
>>>     private final boolean ignoreSurroundingSpaces; // Should leading/trailing
spaces be ignored around values?
>>>     private final boolean ignoreEmptyLines;
>>>     private final String lineSeparator; // for outputs
>>>     private final String[] header;
>>>
>>> -    private final boolean isEscaping;
>>> -    private final boolean isCommentingEnabled;
>>> -    private final boolean isEncapsulating;
>>> -
>>> -    /**
>>> -     * Constant char to be used for disabling comments, escapes and encapsulation.
The value -2 is used because it
>>> -     * won't be confused with an EOF signal (-1), and because the Unicode value
{@code FFFE} would be encoded as two chars
>>> -     * (using surrogates) and thus there should never be a collision with a
real text char.
>>> -     */
>>> -    static final char DISABLED = '\ufffe';
>>> -
>>>     /**
>>>      * Starting format with no settings defined; used for creating other formats
from scratch.
>>>      */
>>> -    static final CSVFormat PRISTINE = new CSVFormat(DISABLED, DISABLED, DISABLED,
DISABLED, false, false, null, null);
>>> +    static final CSVFormat PRISTINE = new CSVFormat(null, null, null, null,
false, false, null, null);
>>>
>>>     /**
>>>      * Standard comma separated format, as for {@link #RFC4180} but allowing
blank lines.
>>> @@ -73,8 +63,8 @@ public class CSVFormat implements Serial
>>>      * </ul>
>>>      */
>>>     public static final CSVFormat DEFAULT =
>>> -            PRISTINE.
>>> -            withDelimiter(COMMA)
>>> +            PRISTINE
>>> +            .withDelimiter(COMMA)
>>>             .withEncapsulator(DOUBLE_QUOTE)
>>>             .withIgnoreEmptyLines(true)
>>>             .withLineSeparator(CRLF);
>>> @@ -89,8 +79,8 @@ public class CSVFormat implements Serial
>>>      * </ul>
>>>      */
>>>     public static final CSVFormat RFC4180 =
>>> -            PRISTINE.
>>> -            withDelimiter(COMMA)
>>> +            PRISTINE
>>> +            .withDelimiter(COMMA)
>>>             .withEncapsulator(DOUBLE_QUOTE)
>>>             .withLineSeparator(CRLF);
>>>
>>> @@ -127,7 +117,7 @@ public class CSVFormat implements Serial
>>>      * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">
>>>      *      http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
>>>      */
>>> -    public static final CSVFormat MYSQL =
>>> +    public static final CSVFormat MYSQL =
>>>             PRISTINE
>>>             .withDelimiter(TAB)
>>>             .withEscape(ESCAPE)
>>> @@ -153,7 +143,7 @@ public class CSVFormat implements Serial
>>>      * @param header
>>>      *            the header
>>>      */
>>> -    CSVFormat(final char delimiter, final char encapsulator, final char commentStart,
final char escape, final boolean surroundingSpacesIgnored,
>>> +    CSVFormat(final Character delimiter, final Character encapsulator, final
Character commentStart, final Character escape, final boolean surroundingSpacesIgnored,
>>>             final boolean emptyLinesIgnored, final String lineSeparator, final
String[] header) {
>>>         this.delimiter = delimiter;
>>>         this.encapsulator = encapsulator;
>>> @@ -163,9 +153,6 @@ public class CSVFormat implements Serial
>>>         this.ignoreEmptyLines = emptyLinesIgnored;
>>>         this.lineSeparator = lineSeparator;
>>>         this.header = header;
>>> -        this.isEncapsulating = encapsulator != DISABLED;
>>> -        this.isCommentingEnabled = commentStart != DISABLED;
>>> -        this.isEscaping = escape != DISABLED;
>>>     }
>>>
>>>     /**
>>> @@ -176,8 +163,8 @@ public class CSVFormat implements Serial
>>>      *
>>>      * @return true if <code>c</code> is a line break character
>>>      */
>>> -    private static boolean isLineBreak(final char c) {
>>> -        return c == '\n' || c == '\r';
>>> +    private static boolean isLineBreak(final Character c) {
>>> +        return c != null && (c == LF || c == CR);
>>>     }
>>>
>>>     /**
>>> @@ -199,12 +186,12 @@ public class CSVFormat implements Serial
>>>                     commentStart + "\")");
>>>         }
>>>
>>> -        if (encapsulator != DISABLED && encapsulator == commentStart)
{
>>> +        if (encapsulator != null && encapsulator == commentStart) {
>>>             throw new IllegalArgumentException(
>>>                     "The comment start character and the encapsulator cannot
be the same (\"" + commentStart + "\")");
>>>         }
>>>
>>> -        if (escape != DISABLED && escape == commentStart) {
>>> +        if (escape != null && escape == commentStart) {
>>>             throw new IllegalArgumentException("The comment start and the escape
character cannot be the same (\"" +
>>>                     commentStart + "\")");
>>>         }
>>> @@ -229,6 +216,19 @@ public class CSVFormat implements Serial
>>>      *             thrown if the specified character is a line break
>>>      */
>>>     public CSVFormat withDelimiter(final char delimiter) {
>>> +        return withDelimiter(Character.valueOf(delimiter));
>>> +    }
>>> +
>>> +    /**
>>> +     * Returns a copy of this format using the specified delimiter character.
>>> +     *
>>> +     * @param delimiter
>>> +     *            the delimiter character
>>> +     * @return A copy of this format using the specified delimiter character
>>> +     * @throws IllegalArgumentException
>>> +     *             thrown if the specified character is a line break
>>> +     */
>>> +    public CSVFormat withDelimiter(final Character delimiter) {
>>>         if (isLineBreak(delimiter)) {
>>>             throw new IllegalArgumentException("The delimiter cannot be a line
break");
>>>         }
>>> @@ -241,7 +241,7 @@ public class CSVFormat implements Serial
>>>      *
>>>      * @return the encapsulator character
>>>      */
>>> -    public char getEncapsulator() {
>>> +    public Character getEncapsulator() {
>>>         return encapsulator;
>>>     }
>>>
>>> @@ -255,6 +255,19 @@ public class CSVFormat implements Serial
>>>      *             thrown if the specified character is a line break
>>>      */
>>>     public CSVFormat withEncapsulator(final char encapsulator) {
>>> +        return withEncapsulator(Character.valueOf(encapsulator));
>>> +    }
>>> +
>>> +    /**
>>> +     * Returns a copy of this format using the specified encapsulator character.
>>> +     *
>>> +     * @param encapsulator
>>> +     *            the encapsulator character
>>> +     * @return A copy of this format using the specified encapsulator character
>>> +     * @throws IllegalArgumentException
>>> +     *             thrown if the specified character is a line break
>>> +     */
>>> +    public CSVFormat withEncapsulator(final Character encapsulator) {
>>>         if (isLineBreak(encapsulator)) {
>>>             throw new IllegalArgumentException("The encapsulator cannot be a
line break");
>>>         }
>>> @@ -268,7 +281,7 @@ public class CSVFormat implements Serial
>>>      * @return {@code true} if an encapsulator is defined
>>>      */
>>>     public boolean isEncapsulating() {
>>> -        return isEncapsulating;
>>> +        return encapsulator != null;
>>>     }
>>>
>>>     /**
>>> @@ -276,7 +289,7 @@ public class CSVFormat implements Serial
>>>      *
>>>      * @return the comment start marker.
>>>      */
>>> -    public char getCommentStart() {
>>> +    public Character getCommentStart() {
>>>         return commentStart;
>>>     }
>>>
>>> @@ -292,6 +305,21 @@ public class CSVFormat implements Serial
>>>      *             thrown if the specified character is a line break
>>>      */
>>>     public CSVFormat withCommentStart(final char commentStart) {
>>> +        return withCommentStart(Character.valueOf(commentStart));
>>> +    }
>>> +
>>> +    /**
>>> +     * Returns a copy of this format using the specified character as the comment
start marker.
>>> +     *
>>> +     * Note that the comment introducer character is only recognised at the
start of a line.
>>> +     *
>>> +     * @param commentStart
>>> +     *            the comment start marker
>>> +     * @return A copy of this format using the specified character as the comment
start marker
>>> +     * @throws IllegalArgumentException
>>> +     *             thrown if the specified character is a line break
>>> +     */
>>> +    public CSVFormat withCommentStart(final Character commentStart) {
>>>         if (isLineBreak(commentStart)) {
>>>             throw new IllegalArgumentException("The comment start character cannot
be a line break");
>>>         }
>>> @@ -307,7 +335,7 @@ public class CSVFormat implements Serial
>>>      * @return <tt>true</tt> is comments are supported, <tt>false</tt>
otherwise
>>>      */
>>>     public boolean isCommentingEnabled() {
>>> -        return isCommentingEnabled;
>>> +        return commentStart != null;
>>>     }
>>>
>>>     /**
>>> @@ -315,7 +343,7 @@ public class CSVFormat implements Serial
>>>      *
>>>      * @return the escape character
>>>      */
>>> -    public char getEscape() {
>>> +    public Character getEscape() {
>>>         return escape;
>>>     }
>>>
>>> @@ -329,6 +357,19 @@ public class CSVFormat implements Serial
>>>      *             thrown if the specified character is a line break
>>>      */
>>>     public CSVFormat withEscape(final char escape) {
>>> +        return withEscape(Character.valueOf(escape));
>>> +    }
>>> +
>>> +    /**
>>> +     * Returns a copy of this format using the specified escape character.
>>> +     *
>>> +     * @param escape
>>> +     *            the escape character
>>> +     * @return A copy of this format using the specified escape character
>>> +     * @throws IllegalArgumentException
>>> +     *             thrown if the specified character is a line break
>>> +     */
>>> +    public CSVFormat withEscape(final Character escape) {
>>>         if (isLineBreak(escape)) {
>>>             throw new IllegalArgumentException("The escape character cannot be
a line break");
>>>         }
>>> @@ -342,7 +383,7 @@ public class CSVFormat implements Serial
>>>      * @return {@code true} if escapes are processed
>>>      */
>>>     public boolean isEscaping() {
>>> -        return isEscaping;
>>> +        return escape != null;
>>>     }
>>>
>>>     /**
>>>
>>> Modified: commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java
>>> URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java?rev=1397783&r1=1397782&r2=1397783&view=diff
>>> ==============================================================================
>>> --- commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java
(original)
>>> +++ commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java
Sat Oct 13 06:27:52 2012
>>> @@ -32,14 +32,10 @@ import java.io.IOException;
>>>  */
>>> abstract class Lexer {
>>>
>>> -    private final boolean isEncapsulating;
>>> -    private final boolean isEscaping;
>>> -    private final boolean isCommentEnabled;
>>> -
>>> -    private final char delimiter;
>>> -    private final char escape;
>>> -    private final char encapsulator;
>>> -    private final char commmentStart;
>>> +    private final Character delimiter;
>>> +    private final Character escape;
>>> +    private final Character encapsulator;
>>> +    private final Character commmentStart;
>>>
>>>     final boolean surroundingSpacesIgnored;
>>>     final boolean emptyLinesIgnored;
>>> @@ -52,9 +48,6 @@ abstract class Lexer {
>>>     Lexer(final CSVFormat format, final ExtendedBufferedReader in) {
>>>         this.format = format;
>>>         this.in = in;
>>> -        this.isEncapsulating = format.isEncapsulating();
>>> -        this.isEscaping = format.isEscaping();
>>> -        this.isCommentEnabled = format.isCommentingEnabled();
>>>         this.delimiter = format.getDelimiter();
>>>         this.escape = format.getEscape();
>>>         this.encapsulator = format.getEncapsulator();
>>> @@ -144,14 +137,14 @@ abstract class Lexer {
>>>     }
>>>
>>>     boolean isEscape(final int c) {
>>> -        return isEscaping && c == escape;
>>> +        return escape != null && c == escape;
>>>     }
>>>
>>>     boolean isEncapsulator(final int c) {
>>> -        return isEncapsulating && c == encapsulator;
>>> +        return encapsulator != null && c == encapsulator;
>>>     }
>>>
>>>     boolean isCommentStart(final int c) {
>>> -        return isCommentEnabled && c == commmentStart;
>>> +        return commmentStart != null && c == commmentStart;
>>>     }
>>> }
>>>
>>> Modified: commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java
>>> URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java?rev=1397783&r1=1397782&r2=1397783&view=diff
>>> ==============================================================================
>>> --- commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java
(original)
>>> +++ commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java
Sat Oct 13 06:27:52 2012
>>> @@ -46,9 +46,9 @@ public class CSVFormatTest {
>>>         format.withIgnoreEmptyLines(false);
>>>
>>>         assertEquals('!', format.getDelimiter());
>>> -        assertEquals('!', format.getEncapsulator());
>>> -        assertEquals('!', format.getCommentStart());
>>> -        assertEquals('!', format.getEscape());
>>> +        assertEquals('!', format.getEncapsulator().charValue());
>>> +        assertEquals('!', format.getCommentStart().charValue());
>>> +        assertEquals('!', format.getEscape().charValue());
>>>         assertEquals(CRLF, format.getLineSeparator());
>>>
>>>         assertTrue(format.getIgnoreSurroundingSpaces());
>>> @@ -60,10 +60,10 @@ public class CSVFormatTest {
>>>         final CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true,
CRLF, null);
>>>
>>>         assertEquals('?', format.withDelimiter('?').getDelimiter());
>>> -        assertEquals('?', format.withEncapsulator('?').getEncapsulator());
>>> -        assertEquals('?', format.withCommentStart('?').getCommentStart());
>>> +        assertEquals('?', format.withEncapsulator('?').getEncapsulator().charValue());
>>> +        assertEquals('?', format.withCommentStart('?').getCommentStart().charValue());
>>>         assertEquals("?", format.withLineSeparator("?").getLineSeparator());
>>> -        assertEquals('?', format.withEscape('?').getEscape());
>>> +        assertEquals('?', format.withEscape('?').getEscape().charValue());
>>>
>>>         assertFalse(format.withIgnoreSurroundingSpaces(false).getIgnoreSurroundingSpaces());
>>>         assertFalse(format.withIgnoreEmptyLines(false).getIgnoreEmptyLines());
>>> @@ -131,7 +131,7 @@ public class CSVFormatTest {
>>>             // expected
>>>         }
>>>
>>> -        format.withEncapsulator(CSVFormat.DISABLED).withCommentStart(CSVFormat.DISABLED).validate();
>>> +        format.withEncapsulator(null).withCommentStart(null).validate();
>>>
>>>         try {
>>>             format.withEscape('!').withCommentStart('!').validate();
>>> @@ -140,7 +140,7 @@ public class CSVFormatTest {
>>>             // expected
>>>         }
>>>
>>> -        format.withEscape(CSVFormat.DISABLED).withCommentStart(CSVFormat.DISABLED).validate();
>>> +        format.withEscape(null).withCommentStart(null).validate();
>>>
>>>
>>>         try {
>>>
>>>
>>
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
>> For additional commands, e-mail: dev-help@commons.apache.org
>>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
> For additional commands, e-mail: dev-help@commons.apache.org
>

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
For additional commands, e-mail: dev-help@commons.apache.org


Mime
View raw message