commons-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Gary Gregory <garydgreg...@gmail.com>
Subject Re: svn commit: r1397783 - in /commons/proper/csv/trunk/src: main/java/org/apache/commons/csv/CSVFormat.java main/java/org/apache/commons/csv/Lexer.java test/java/org/apache/commons/csv/CSVFormatTest.java
Date Sat, 13 Oct 2012 12:48:58 GMT
On Oct 13, 2012, at 8:37, sebb <sebbaz@gmail.com> wrote:

> On 13 October 2012 13:14, Gary Gregory <garydgregory@gmail.com> wrote:
>> Thank you for the feedback Sebb. I'll do another pass later today.
>
> See also my subsequent posting on the dev list.
> I think that might resolve the performance issue without needing to
> revert all the changes to CSVFormat.

Roger that. It'll be a couple of hours before I get to it.

Thank you again.

Gary

>
>> Gary
>>
>> On Oct 13, 2012, at 6:28, sebb <sebbaz@gmail.com> wrote:
>>
>>> On 13 October 2012 07:27,  <ggregory@apache.org> wrote:
>>>> Author: ggregory
>>>> Date: Sat Oct 13 06:27:52 2012
>>>> New Revision: 1397783
>>>>
>>>> URL: http://svn.apache.org/viewvc?rev=1397783&view=rev
>>>> Log:
>>>> Remove DISABLED character hack.
>>>
>>> -1 (for now)
>>>
>>> Are you sure this change does not affect performance?
>>> The Lexer code now has to do some boxing and unboxing.
>>> Unboxing is not expensive, but boxing is potentially so.
>>>
>>> Also, the code now allows for a null delimiter - is that really sensible?
>>>
>>> Also CSVFormat.getDelimiter() is inconsistent - it returns char
>>> whereas all the others return Character.
>>>
>>> I think the Lexer should stick to using char, particularly for the
>>> delimiter which cannot be null.
>>>
>>>> Modified:
>>>>   commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java
>>>>   commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java
>>>>   commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java
>>>>
>>>> Modified: commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java
>>>> URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java?rev=1397783&r1=1397782&r2=1397783&view=diff
>>>> ==============================================================================
>>>> --- commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java
(original)
>>>> +++ commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java
Sat Oct 13 06:27:52 2012
>>>> @@ -18,6 +18,7 @@
>>>> package org.apache.commons.csv;
>>>>
>>>> import static org.apache.commons.csv.Constants.COMMA;
>>>> +import static org.apache.commons.csv.Constants.CR;
>>>> import static org.apache.commons.csv.Constants.CRLF;
>>>> import static org.apache.commons.csv.Constants.DOUBLE_QUOTE;
>>>> import static org.apache.commons.csv.Constants.ESCAPE;
>>>> @@ -38,30 +39,19 @@ public class CSVFormat implements Serial
>>>>
>>>>    private static final long serialVersionUID = 1L;
>>>>
>>>> -    private final char delimiter;
>>>> -    private final char encapsulator;
>>>> -    private final char commentStart;
>>>> -    private final char escape;
>>>> +    private final Character delimiter;
>>>> +    private final Character encapsulator;
>>>> +    private final Character commentStart;
>>>> +    private final Character escape;
>>>>    private final boolean ignoreSurroundingSpaces; // Should leading/trailing
spaces be ignored around values?
>>>>    private final boolean ignoreEmptyLines;
>>>>    private final String lineSeparator; // for outputs
>>>>    private final String[] header;
>>>>
>>>> -    private final boolean isEscaping;
>>>> -    private final boolean isCommentingEnabled;
>>>> -    private final boolean isEncapsulating;
>>>> -
>>>> -    /**
>>>> -     * Constant char to be used for disabling comments, escapes and encapsulation.
The value -2 is used because it
>>>> -     * won't be confused with an EOF signal (-1), and because the Unicode
value {@code FFFE} would be encoded as two chars
>>>> -     * (using surrogates) and thus there should never be a collision with
a real text char.
>>>> -     */
>>>> -    static final char DISABLED = '\ufffe';
>>>> -
>>>>    /**
>>>>     * Starting format with no settings defined; used for creating other formats
from scratch.
>>>>     */
>>>> -    static final CSVFormat PRISTINE = new CSVFormat(DISABLED, DISABLED,
DISABLED, DISABLED, false, false, null, null);
>>>> +    static final CSVFormat PRISTINE = new CSVFormat(null, null, null, null,
false, false, null, null);
>>>>
>>>>    /**
>>>>     * Standard comma separated format, as for {@link #RFC4180} but allowing
blank lines.
>>>> @@ -73,8 +63,8 @@ public class CSVFormat implements Serial
>>>>     * </ul>
>>>>     */
>>>>    public static final CSVFormat DEFAULT =
>>>> -            PRISTINE.
>>>> -            withDelimiter(COMMA)
>>>> +            PRISTINE
>>>> +            .withDelimiter(COMMA)
>>>>            .withEncapsulator(DOUBLE_QUOTE)
>>>>            .withIgnoreEmptyLines(true)
>>>>            .withLineSeparator(CRLF);
>>>> @@ -89,8 +79,8 @@ public class CSVFormat implements Serial
>>>>     * </ul>
>>>>     */
>>>>    public static final CSVFormat RFC4180 =
>>>> -            PRISTINE.
>>>> -            withDelimiter(COMMA)
>>>> +            PRISTINE
>>>> +            .withDelimiter(COMMA)
>>>>            .withEncapsulator(DOUBLE_QUOTE)
>>>>            .withLineSeparator(CRLF);
>>>>
>>>> @@ -127,7 +117,7 @@ public class CSVFormat implements Serial
>>>>     * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">
>>>>     *      http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
>>>>     */
>>>> -    public static final CSVFormat MYSQL =
>>>> +    public static final CSVFormat MYSQL =
>>>>            PRISTINE
>>>>            .withDelimiter(TAB)
>>>>            .withEscape(ESCAPE)
>>>> @@ -153,7 +143,7 @@ public class CSVFormat implements Serial
>>>>     * @param header
>>>>     *            the header
>>>>     */
>>>> -    CSVFormat(final char delimiter, final char encapsulator, final char
commentStart, final char escape, final boolean surroundingSpacesIgnored,
>>>> +    CSVFormat(final Character delimiter, final Character encapsulator, final
Character commentStart, final Character escape, final boolean surroundingSpacesIgnored,
>>>>            final boolean emptyLinesIgnored, final String lineSeparator, final
String[] header) {
>>>>        this.delimiter = delimiter;
>>>>        this.encapsulator = encapsulator;
>>>> @@ -163,9 +153,6 @@ public class CSVFormat implements Serial
>>>>        this.ignoreEmptyLines = emptyLinesIgnored;
>>>>        this.lineSeparator = lineSeparator;
>>>>        this.header = header;
>>>> -        this.isEncapsulating = encapsulator != DISABLED;
>>>> -        this.isCommentingEnabled = commentStart != DISABLED;
>>>> -        this.isEscaping = escape != DISABLED;
>>>>    }
>>>>
>>>>    /**
>>>> @@ -176,8 +163,8 @@ public class CSVFormat implements Serial
>>>>     *
>>>>     * @return true if <code>c</code> is a line break character
>>>>     */
>>>> -    private static boolean isLineBreak(final char c) {
>>>> -        return c == '\n' || c == '\r';
>>>> +    private static boolean isLineBreak(final Character c) {
>>>> +        return c != null && (c == LF || c == CR);
>>>>    }
>>>>
>>>>    /**
>>>> @@ -199,12 +186,12 @@ public class CSVFormat implements Serial
>>>>                    commentStart + "\")");
>>>>        }
>>>>
>>>> -        if (encapsulator != DISABLED && encapsulator == commentStart)
{
>>>> +        if (encapsulator != null && encapsulator == commentStart)
{
>>>>            throw new IllegalArgumentException(
>>>>                    "The comment start character and the encapsulator cannot
be the same (\"" + commentStart + "\")");
>>>>        }
>>>>
>>>> -        if (escape != DISABLED && escape == commentStart) {
>>>> +        if (escape != null && escape == commentStart) {
>>>>            throw new IllegalArgumentException("The comment start and the
escape character cannot be the same (\"" +
>>>>                    commentStart + "\")");
>>>>        }
>>>> @@ -229,6 +216,19 @@ public class CSVFormat implements Serial
>>>>     *             thrown if the specified character is a line break
>>>>     */
>>>>    public CSVFormat withDelimiter(final char delimiter) {
>>>> +        return withDelimiter(Character.valueOf(delimiter));
>>>> +    }
>>>> +
>>>> +    /**
>>>> +     * Returns a copy of this format using the specified delimiter character.
>>>> +     *
>>>> +     * @param delimiter
>>>> +     *            the delimiter character
>>>> +     * @return A copy of this format using the specified delimiter character
>>>> +     * @throws IllegalArgumentException
>>>> +     *             thrown if the specified character is a line break
>>>> +     */
>>>> +    public CSVFormat withDelimiter(final Character delimiter) {
>>>>        if (isLineBreak(delimiter)) {
>>>>            throw new IllegalArgumentException("The delimiter cannot be a
line break");
>>>>        }
>>>> @@ -241,7 +241,7 @@ public class CSVFormat implements Serial
>>>>     *
>>>>     * @return the encapsulator character
>>>>     */
>>>> -    public char getEncapsulator() {
>>>> +    public Character getEncapsulator() {
>>>>        return encapsulator;
>>>>    }
>>>>
>>>> @@ -255,6 +255,19 @@ public class CSVFormat implements Serial
>>>>     *             thrown if the specified character is a line break
>>>>     */
>>>>    public CSVFormat withEncapsulator(final char encapsulator) {
>>>> +        return withEncapsulator(Character.valueOf(encapsulator));
>>>> +    }
>>>> +
>>>> +    /**
>>>> +     * Returns a copy of this format using the specified encapsulator character.
>>>> +     *
>>>> +     * @param encapsulator
>>>> +     *            the encapsulator character
>>>> +     * @return A copy of this format using the specified encapsulator character
>>>> +     * @throws IllegalArgumentException
>>>> +     *             thrown if the specified character is a line break
>>>> +     */
>>>> +    public CSVFormat withEncapsulator(final Character encapsulator) {
>>>>        if (isLineBreak(encapsulator)) {
>>>>            throw new IllegalArgumentException("The encapsulator cannot be
a line break");
>>>>        }
>>>> @@ -268,7 +281,7 @@ public class CSVFormat implements Serial
>>>>     * @return {@code true} if an encapsulator is defined
>>>>     */
>>>>    public boolean isEncapsulating() {
>>>> -        return isEncapsulating;
>>>> +        return encapsulator != null;
>>>>    }
>>>>
>>>>    /**
>>>> @@ -276,7 +289,7 @@ public class CSVFormat implements Serial
>>>>     *
>>>>     * @return the comment start marker.
>>>>     */
>>>> -    public char getCommentStart() {
>>>> +    public Character getCommentStart() {
>>>>        return commentStart;
>>>>    }
>>>>
>>>> @@ -292,6 +305,21 @@ public class CSVFormat implements Serial
>>>>     *             thrown if the specified character is a line break
>>>>     */
>>>>    public CSVFormat withCommentStart(final char commentStart) {
>>>> +        return withCommentStart(Character.valueOf(commentStart));
>>>> +    }
>>>> +
>>>> +    /**
>>>> +     * Returns a copy of this format using the specified character as the
comment start marker.
>>>> +     *
>>>> +     * Note that the comment introducer character is only recognised at
the start of a line.
>>>> +     *
>>>> +     * @param commentStart
>>>> +     *            the comment start marker
>>>> +     * @return A copy of this format using the specified character as the
comment start marker
>>>> +     * @throws IllegalArgumentException
>>>> +     *             thrown if the specified character is a line break
>>>> +     */
>>>> +    public CSVFormat withCommentStart(final Character commentStart) {
>>>>        if (isLineBreak(commentStart)) {
>>>>            throw new IllegalArgumentException("The comment start character
cannot be a line break");
>>>>        }
>>>> @@ -307,7 +335,7 @@ public class CSVFormat implements Serial
>>>>     * @return <tt>true</tt> is comments are supported, <tt>false</tt>
otherwise
>>>>     */
>>>>    public boolean isCommentingEnabled() {
>>>> -        return isCommentingEnabled;
>>>> +        return commentStart != null;
>>>>    }
>>>>
>>>>    /**
>>>> @@ -315,7 +343,7 @@ public class CSVFormat implements Serial
>>>>     *
>>>>     * @return the escape character
>>>>     */
>>>> -    public char getEscape() {
>>>> +    public Character getEscape() {
>>>>        return escape;
>>>>    }
>>>>
>>>> @@ -329,6 +357,19 @@ public class CSVFormat implements Serial
>>>>     *             thrown if the specified character is a line break
>>>>     */
>>>>    public CSVFormat withEscape(final char escape) {
>>>> +        return withEscape(Character.valueOf(escape));
>>>> +    }
>>>> +
>>>> +    /**
>>>> +     * Returns a copy of this format using the specified escape character.
>>>> +     *
>>>> +     * @param escape
>>>> +     *            the escape character
>>>> +     * @return A copy of this format using the specified escape character
>>>> +     * @throws IllegalArgumentException
>>>> +     *             thrown if the specified character is a line break
>>>> +     */
>>>> +    public CSVFormat withEscape(final Character escape) {
>>>>        if (isLineBreak(escape)) {
>>>>            throw new IllegalArgumentException("The escape character cannot
be a line break");
>>>>        }
>>>> @@ -342,7 +383,7 @@ public class CSVFormat implements Serial
>>>>     * @return {@code true} if escapes are processed
>>>>     */
>>>>    public boolean isEscaping() {
>>>> -        return isEscaping;
>>>> +        return escape != null;
>>>>    }
>>>>
>>>>    /**
>>>>
>>>> Modified: commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java
>>>> URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java?rev=1397783&r1=1397782&r2=1397783&view=diff
>>>> ==============================================================================
>>>> --- commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java
(original)
>>>> +++ commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java
Sat Oct 13 06:27:52 2012
>>>> @@ -32,14 +32,10 @@ import java.io.IOException;
>>>> */
>>>> abstract class Lexer {
>>>>
>>>> -    private final boolean isEncapsulating;
>>>> -    private final boolean isEscaping;
>>>> -    private final boolean isCommentEnabled;
>>>> -
>>>> -    private final char delimiter;
>>>> -    private final char escape;
>>>> -    private final char encapsulator;
>>>> -    private final char commmentStart;
>>>> +    private final Character delimiter;
>>>> +    private final Character escape;
>>>> +    private final Character encapsulator;
>>>> +    private final Character commmentStart;
>>>>
>>>>    final boolean surroundingSpacesIgnored;
>>>>    final boolean emptyLinesIgnored;
>>>> @@ -52,9 +48,6 @@ abstract class Lexer {
>>>>    Lexer(final CSVFormat format, final ExtendedBufferedReader in) {
>>>>        this.format = format;
>>>>        this.in = in;
>>>> -        this.isEncapsulating = format.isEncapsulating();
>>>> -        this.isEscaping = format.isEscaping();
>>>> -        this.isCommentEnabled = format.isCommentingEnabled();
>>>>        this.delimiter = format.getDelimiter();
>>>>        this.escape = format.getEscape();
>>>>        this.encapsulator = format.getEncapsulator();
>>>> @@ -144,14 +137,14 @@ abstract class Lexer {
>>>>    }
>>>>
>>>>    boolean isEscape(final int c) {
>>>> -        return isEscaping && c == escape;
>>>> +        return escape != null && c == escape;
>>>>    }
>>>>
>>>>    boolean isEncapsulator(final int c) {
>>>> -        return isEncapsulating && c == encapsulator;
>>>> +        return encapsulator != null && c == encapsulator;
>>>>    }
>>>>
>>>>    boolean isCommentStart(final int c) {
>>>> -        return isCommentEnabled && c == commmentStart;
>>>> +        return commmentStart != null && c == commmentStart;
>>>>    }
>>>> }
>>>>
>>>> Modified: commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java
>>>> URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java?rev=1397783&r1=1397782&r2=1397783&view=diff
>>>> ==============================================================================
>>>> --- commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java
(original)
>>>> +++ commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java
Sat Oct 13 06:27:52 2012
>>>> @@ -46,9 +46,9 @@ public class CSVFormatTest {
>>>>        format.withIgnoreEmptyLines(false);
>>>>
>>>>        assertEquals('!', format.getDelimiter());
>>>> -        assertEquals('!', format.getEncapsulator());
>>>> -        assertEquals('!', format.getCommentStart());
>>>> -        assertEquals('!', format.getEscape());
>>>> +        assertEquals('!', format.getEncapsulator().charValue());
>>>> +        assertEquals('!', format.getCommentStart().charValue());
>>>> +        assertEquals('!', format.getEscape().charValue());
>>>>        assertEquals(CRLF, format.getLineSeparator());
>>>>
>>>>        assertTrue(format.getIgnoreSurroundingSpaces());
>>>> @@ -60,10 +60,10 @@ public class CSVFormatTest {
>>>>        final CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true,
CRLF, null);
>>>>
>>>>        assertEquals('?', format.withDelimiter('?').getDelimiter());
>>>> -        assertEquals('?', format.withEncapsulator('?').getEncapsulator());
>>>> -        assertEquals('?', format.withCommentStart('?').getCommentStart());
>>>> +        assertEquals('?', format.withEncapsulator('?').getEncapsulator().charValue());
>>>> +        assertEquals('?', format.withCommentStart('?').getCommentStart().charValue());
>>>>        assertEquals("?", format.withLineSeparator("?").getLineSeparator());
>>>> -        assertEquals('?', format.withEscape('?').getEscape());
>>>> +        assertEquals('?', format.withEscape('?').getEscape().charValue());
>>>>
>>>>        assertFalse(format.withIgnoreSurroundingSpaces(false).getIgnoreSurroundingSpaces());
>>>>        assertFalse(format.withIgnoreEmptyLines(false).getIgnoreEmptyLines());
>>>> @@ -131,7 +131,7 @@ public class CSVFormatTest {
>>>>            // expected
>>>>        }
>>>>
>>>> -        format.withEncapsulator(CSVFormat.DISABLED).withCommentStart(CSVFormat.DISABLED).validate();
>>>> +        format.withEncapsulator(null).withCommentStart(null).validate();
>>>>
>>>>        try {
>>>>            format.withEscape('!').withCommentStart('!').validate();
>>>> @@ -140,7 +140,7 @@ public class CSVFormatTest {
>>>>            // expected
>>>>        }
>>>>
>>>> -        format.withEscape(CSVFormat.DISABLED).withCommentStart(CSVFormat.DISABLED).validate();
>>>> +        format.withEscape(null).withCommentStart(null).validate();
>>>>
>>>>
>>>>        try {
>>>>
>>>>
>>>
>>> ---------------------------------------------------------------------
>>> To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
>>> For additional commands, e-mail: dev-help@commons.apache.org
>>>
>>
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
>> For additional commands, e-mail: dev-help@commons.apache.org
>>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
> For additional commands, e-mail: dev-help@commons.apache.org
>

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
For additional commands, e-mail: dev-help@commons.apache.org


Mime
View raw message