commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s...@apache.org
Subject svn commit: r1301928 - in /commons/proper/csv/trunk/src: main/java/org/apache/commons/csv/ test/java/org/apache/commons/csv/
Date Sat, 17 Mar 2012 12:29:15 GMT
Author: sebb
Date: Sat Mar 17 12:29:15 2012
New Revision: 1301928

URL: http://svn.apache.org/viewvc?rev=1301928&view=rev
Log:
CSV-67 UnicodeUnescapeReader should not be applied before parsing

Removed:
    commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/UnicodeUnescapeReader.java
Modified:
    commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java
    commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java
    commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java
    commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java

Modified: commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java
URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java?rev=1301928&r1=1301927&r2=1301928&view=diff
==============================================================================
--- commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java (original)
+++ commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java Sat Mar 17
12:29:15 2012
@@ -38,7 +38,6 @@ public class CSVFormat implements Serial
     private final char escape;
     private final boolean leadingSpacesIgnored;
     private final boolean trailingSpacesIgnored;
-    private final boolean unicodeEscapesInterpreted;
     private final boolean emptyLinesIgnored;
     private final String lineSeparator; // for outputs
     private final String[] header;
@@ -53,7 +52,7 @@ public class CSVFormat implements Serial
     static final char DISABLED = '\ufffe';
 
     /** Standard comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC
4180</a>. */
-    public static final CSVFormat DEFAULT = new CSVFormat(',', '"', DISABLED, DISABLED, true,
true, false, true, CRLF, null);
+    public static final CSVFormat DEFAULT = new CSVFormat(',', '"', DISABLED, DISABLED, true,
true, true, CRLF, null);
 
     /**
      * Excel file format (using a comma as the value delimiter).
@@ -66,10 +65,10 @@ public class CSVFormat implements Serial
      * 
      * <pre>CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');</pre>
      */
-    public static final CSVFormat EXCEL = new CSVFormat(',', '"', DISABLED, DISABLED, false,
false, false, false, CRLF, null);
+    public static final CSVFormat EXCEL = new CSVFormat(',', '"', DISABLED, DISABLED, false,
false, false, CRLF, null);
 
     /** Tab-delimited format, with quote; leading and trailing spaces ignored. */
-    public static final CSVFormat TDF = new CSVFormat('\t', '"', DISABLED, DISABLED, true,
true, false, true, CRLF, null);
+    public static final CSVFormat TDF = new CSVFormat('\t', '"', DISABLED, DISABLED, true,
true, true, CRLF, null);
 
     /**
      * Default MySQL format used by the <tt>SELECT INTO OUTFILE</tt> and
@@ -79,7 +78,7 @@ public class CSVFormat implements Serial
      * 
      * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
      */
-    public static final CSVFormat MYSQL = new CSVFormat('\t', DISABLED, DISABLED, '\\', false,
false, false, false, "\n", null);
+    public static final CSVFormat MYSQL = new CSVFormat('\t', DISABLED, DISABLED, '\\', false,
false, false, "\n", null);
 
 
     /**
@@ -91,7 +90,6 @@ public class CSVFormat implements Serial
      * @param escape                    the char used to escape special characters in values
      * @param leadingSpacesIgnored      <tt>true</tt> when leading whitespaces
should be ignored
      * @param trailingSpacesIgnored     <tt>true</tt> when trailing whitespaces
should be ignored
-     * @param unicodeEscapesInterpreted <tt>true</tt> when unicode escapes should
be interpreted
      * @param emptyLinesIgnored         <tt>true</tt> when the parser should
skip emtpy lines
      * @param lineSeparator             the line separator to use for output
      * @param header                    the header
@@ -103,7 +101,6 @@ public class CSVFormat implements Serial
             char escape,
             boolean leadingSpacesIgnored,
             boolean trailingSpacesIgnored,
-            boolean unicodeEscapesInterpreted,
             boolean emptyLinesIgnored,
             String lineSeparator,
             String[] header) {
@@ -113,7 +110,6 @@ public class CSVFormat implements Serial
         this.escape = escape;
         this.leadingSpacesIgnored = leadingSpacesIgnored;
         this.trailingSpacesIgnored = trailingSpacesIgnored;
-        this.unicodeEscapesInterpreted = unicodeEscapesInterpreted;
         this.emptyLinesIgnored = emptyLinesIgnored;
         this.lineSeparator = lineSeparator;
         this.header = header;
@@ -176,7 +172,7 @@ public class CSVFormat implements Serial
             throw new IllegalArgumentException("The delimiter cannot be a line break");
         }
 
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
     }
 
     /**
@@ -200,7 +196,7 @@ public class CSVFormat implements Serial
             throw new IllegalArgumentException("The encapsulator cannot be a line break");
         }
         
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
     }
 
     boolean isEncapsulating() {
@@ -228,7 +224,7 @@ public class CSVFormat implements Serial
             throw new IllegalArgumentException("The comment start character cannot be a line
break");
         }
         
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
     }
 
     /**
@@ -261,7 +257,7 @@ public class CSVFormat implements Serial
             throw new IllegalArgumentException("The escape character cannot be a line break");
         }
         
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
     }
 
     boolean isEscaping() {
@@ -285,7 +281,7 @@ public class CSVFormat implements Serial
      * @return A copy of this format with the specified left trimming behavior.
      */
     public CSVFormat withLeadingSpacesIgnored(boolean leadingSpacesIgnored) {
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
     }
 
     /**
@@ -305,7 +301,7 @@ public class CSVFormat implements Serial
      * @return A copy of this format with the specified right trimming behavior.
      */
     public CSVFormat withTrailingSpacesIgnored(boolean trailingSpacesIgnored) {
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
     }
 
     /**
@@ -316,28 +312,7 @@ public class CSVFormat implements Serial
      * @return A copy of this format with the specified trimming behavior.
      */
     public CSVFormat withSurroundingSpacesIgnored(boolean surroundingSpacesIgnored) {
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
surroundingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
-    }
-
-    /**
-     * Tells if unicode escape sequences (e.g. {@literal \u1234}) are turned into their corresponding
character
-     * when parsing input.
-     * 
-     * @return <tt>true</tt> if unicode escape sequences are interpreted, <tt>false</tt>
if they are left as is.
-     */
-    public boolean isUnicodeEscapesInterpreted() {
-        return unicodeEscapesInterpreted;
-    }
-
-    /**
-     * Returns a copy of this format with the specified unicode escaping behavior.
-     *
-     * @param unicodeEscapesInterpreted the escaping behavior, <tt>true</tt>
to interpret unicode escape sequences,
-     *                                  <tt>false</tt> to leave the escape sequences
as is.
-     * @return A copy of this format with the specified unicode escaping behavior.
-     */
-    public CSVFormat withUnicodeEscapesInterpreted(boolean unicodeEscapesInterpreted) {
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
     }
 
     /**
@@ -357,7 +332,7 @@ public class CSVFormat implements Serial
      * @return A copy of this format  with the specified empty line skipping behavior.
      */
     public CSVFormat withEmptyLinesIgnored(boolean emptyLinesIgnored) {
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
     }
 
     /**
@@ -377,7 +352,7 @@ public class CSVFormat implements Serial
      * @return A copy of this format using the specified output line separator
      */
     public CSVFormat withLineSeparator(String lineSeparator) {
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
     }
 
     String[] getHeader() {
@@ -399,7 +374,7 @@ public class CSVFormat implements Serial
      * @return A copy of this format using the specified header
      */
     public CSVFormat withHeader(String... header) {
-        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, unicodeEscapesInterpreted, emptyLinesIgnored, lineSeparator, header);
+        return new CSVFormat(delimiter, encapsulator, commentStart, escape, leadingSpacesIgnored,
trailingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
     }
 
     /**

Modified: commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java
URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java?rev=1301928&r1=1301927&r2=1301928&view=diff
==============================================================================
--- commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java (original)
+++ commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java Sat Mar 17
12:29:15 2012
@@ -92,10 +92,6 @@ public class CSVParser implements Iterab
     public CSVParser(Reader input, CSVFormat format) throws IOException {
         format.validate();
         
-        if (format.isUnicodeEscapesInterpreted()) {
-            input = new UnicodeUnescapeReader(input);
-        }
-        
         this.lexer = new CSVLexer(format, new ExtendedBufferedReader(input));
         
         initializeHeader(format);

Modified: commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java
URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java?rev=1301928&r1=1301927&r2=1301928&view=diff
==============================================================================
--- commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java (original)
+++ commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java Sat Mar
17 12:29:15 2012
@@ -30,7 +30,7 @@ public class CSVFormatTest {
 
     @Test
     public void testImmutalibity() {
-        CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, true, "\r\n",
null);
+        CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, "\r\n", null);
         
         format.withDelimiter('?');
         format.withEncapsulator('?');
@@ -40,7 +40,6 @@ public class CSVFormatTest {
         format.withLeadingSpacesIgnored(false);
         format.withTrailingSpacesIgnored(false);
         format.withEmptyLinesIgnored(false);
-        format.withUnicodeEscapesInterpreted(false);
         
         assertEquals('!', format.getDelimiter());
         assertEquals('!', format.getEncapsulator());
@@ -51,12 +50,11 @@ public class CSVFormatTest {
         assertTrue(format.isLeadingSpacesIgnored());
         assertTrue(format.isTrailingSpacesIgnored());
         assertTrue(format.isEmptyLinesIgnored());
-        assertTrue(format.isUnicodeEscapesInterpreted());
     }
 
     @Test
     public void testMutators() {
-        CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, true, "\r\n",
null);
+        CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, true, "\r\n", null);
         
         assertEquals('?', format.withDelimiter('?').getDelimiter());
         assertEquals('?', format.withEncapsulator('?').getEncapsulator());
@@ -69,7 +67,6 @@ public class CSVFormatTest {
         assertFalse(format.withSurroundingSpacesIgnored(false).isLeadingSpacesIgnored());
         assertFalse(format.withSurroundingSpacesIgnored(false).isTrailingSpacesIgnored());
         assertFalse(format.withEmptyLinesIgnored(false).isEmptyLinesIgnored());
-        assertFalse(format.withUnicodeEscapesInterpreted(false).isUnicodeEscapesInterpreted());
     }
 
     @Test
@@ -172,7 +169,6 @@ public class CSVFormatTest {
         assertEquals("comment start", CSVFormat.DEFAULT.getCommentStart(), format.getCommentStart());
         assertEquals("line separator", CSVFormat.DEFAULT.getLineSeparator(), format.getLineSeparator());
         assertEquals("escape", CSVFormat.DEFAULT.getEscape(), format.getEscape());
-        assertEquals("unicode escape", CSVFormat.DEFAULT.isUnicodeEscapesInterpreted(), format.isUnicodeEscapesInterpreted());
         assertEquals("trim left", CSVFormat.DEFAULT.isLeadingSpacesIgnored(), format.isLeadingSpacesIgnored());
         assertEquals("trim right", CSVFormat.DEFAULT.isTrailingSpacesIgnored(), format.isTrailingSpacesIgnored());
         assertEquals("empty lines", CSVFormat.DEFAULT.isEmptyLinesIgnored(), format.isEmptyLinesIgnored());

Modified: commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java
URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java?rev=1301928&r1=1301927&r2=1301928&view=diff
==============================================================================
--- commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java (original)
+++ commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java Sat Mar
17 12:29:15 2012
@@ -283,7 +283,7 @@ public class CSVParserTest {
         };
 
 
-        CSVFormat format = new CSVFormat(',', '\'', CSVFormat.DISABLED, '/', false, false,
true, true, "\r\n", null);
+        CSVFormat format = new CSVFormat(',', '\'', CSVFormat.DISABLED, '/', false, false,
true, "\r\n", null);
 
         CSVParser parser = new CSVParser(code, format);
         List<CSVRecord> records = parser.getRecords();
@@ -312,7 +312,7 @@ public class CSVParserTest {
         };
 
 
-        CSVFormat format = new CSVFormat(',',  CSVFormat.DISABLED,  CSVFormat.DISABLED, '/',
false, false, true, true, "\r\n", null);
+        CSVFormat format = new CSVFormat(',',  CSVFormat.DISABLED,  CSVFormat.DISABLED, '/',
false, false, true, "\r\n", null);
 
         CSVParser parser = new CSVParser(code, format);
         List<CSVRecord> records = parser.getRecords();
@@ -357,30 +357,6 @@ public class CSVParserTest {
     }
 
     @Test
-    public void testUnicodeEscape() throws Exception {
-        String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
-        CSVParser parser = new CSVParser(code, CSVFormat.DEFAULT.withUnicodeEscapesInterpreted(true));
-        final Iterator<CSVRecord> iterator = parser.iterator();
-        CSVRecord record = iterator.next();
-        assertEquals(2, record.size());
-        assertEquals("abc", record.get(0));
-        assertEquals("public", record.get(1));
-        assertFalse("Should not have any more records", iterator.hasNext());
-    }
-
-    @Test
-    public void testUnicodeEscapeMySQL() throws Exception {
-        String code = "abc\t\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
-        CSVParser parser = new CSVParser(code, CSVFormat.MYSQL.withUnicodeEscapesInterpreted(true));
-        final Iterator<CSVRecord> iterator = parser.iterator();
-        CSVRecord record = iterator.next();
-        assertEquals(2, record.size());
-        assertEquals("abc", record.get(0));
-        assertEquals("public", record.get(1));
-        assertFalse("Should not have any more records", iterator.hasNext());
-    }
-
-    @Test
     public void testCarriageReturnLineFeedEndings() throws IOException {
         String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
         CSVParser parser = new CSVParser(new StringReader(code));



Mime
View raw message