commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ggreg...@apache.org
Subject svn commit: r1635052 - in /commons/proper/csv/trunk/src: changes/ main/java/org/apache/commons/csv/ test/java/org/apache/commons/csv/
Date Wed, 29 Oct 2014 05:44:40 GMT
Author: ggregory
Date: Wed Oct 29 05:44:40 2014
New Revision: 1635052

URL: http://svn.apache.org/r1635052
Log:
[CSV-131] Save positions of records to enable random access. The floor is open for code review
and further discussion based on the comments in the Jira.

Modified:
    commons/proper/csv/trunk/src/changes/changes.xml
    commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java
    commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVRecord.java
    commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java
    commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVRecordTest.java

Modified: commons/proper/csv/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/changes/changes.xml?rev=1635052&r1=1635051&r2=1635052&view=diff
==============================================================================
--- commons/proper/csv/trunk/src/changes/changes.xml (original)
+++ commons/proper/csv/trunk/src/changes/changes.xml Wed Oct 29 05:44:40 2014
@@ -39,6 +39,7 @@
   </properties>
   <body>
     <release version="1.1" date="2014-mm-dd" description="Feature and bug fix release">
+      <action issue="CSV-131" type="add" dev="ggregory" due-to="Holger Stratmann">Save
positions of records to enable random access</action>
       <action issue="CSV-130" type="fix" dev="ggregory" due-to="Sergei Lebedev">CSVFormat#withHeader
doesn't work well with #printComment, add withHeaderComments(String...)</action>
       <action issue="CSV-128" type="fix" dev="ggregory">CSVFormat.EXCEL should ignore
empty header names</action>
       <action issue="CSV-129" type="add" dev="ggregory">Add CSVFormat#with 0-arg methods
matching boolean arg methods</action>

Modified: commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java
URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java?rev=1635052&r1=1635051&r2=1635052&view=diff
==============================================================================
--- commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java (original)
+++ commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVParser.java Wed Oct 29
05:44:40 2014
@@ -219,6 +219,12 @@ public final class CSVParser implements 
     private final List<String> record = new ArrayList<String>();
 
     private long recordNumber;
+    
+    /**
+     * Lexer offset if the parser does not start parsing at the beginning of the source.
Usually used in combination
+     * with {@link #setNextRecordNumber(long)}
+     */
+    private long characterOffset;
 
     private final Token reusableToken = new Token();
 
@@ -296,6 +302,43 @@ public final class CSVParser implements 
     }
 
     /**
+     * Sets the record number to be assigned to the next record read.
+     * <p>
+     * Use this if the reader is not positioned at the first record when you create the parser.
For example, the first
+     * record read might be the 51st record in the source file.
+     * </p>
+     * <p>
+     * If you want the records to also have the correct character position referring to the
underlying source, call
+     * {@link #setNextCharacterPosition(long)}.
+     * </p>
+     * 
+     * @param nextRecordNumber
+     *            the next record number
+     * @since 1.1
+     */
+    public void setNextRecordNumber(long nextRecordNumber) {
+        this.recordNumber = nextRecordNumber - 1;
+    }
+    
+    /**
+     * Sets the current position in the source stream regardless of where the parser and
lexer start reading.
+     * <p>
+     * For example: We open a file and seek to position 5434 in order to start reading at
record 42. In order to have
+     * the parser assign the correct characterPosition to records, we call this method.
+     * </p>
+     * <p>
+     * If you want the records to also have the correct record numbers, call {@link #setNextRecordNumber(long)}
+     * </p>
+     * 
+     * @param position
+     *            the new character position
+     * @since 1.1
+     */
+    public void setNextCharacterPosition(long position) {
+        this.characterOffset = position - lexer.getCharacterPosition();
+    }
+    
+    /**
      * Returns the current record number in the input stream.
      *
      * <p>
@@ -445,6 +488,7 @@ public final class CSVParser implements 
         CSVRecord result = null;
         this.record.clear();
         StringBuilder sb = null;
+        final long startCharPosition = lexer.getCharacterPosition() + this.characterOffset;
         do {
             this.reusableToken.reset();
             this.lexer.nextToken(this.reusableToken);
@@ -480,7 +524,7 @@ public final class CSVParser implements 
             this.recordNumber++;
             final String comment = sb == null ? null : sb.toString();
             result = new CSVRecord(this.record.toArray(new String[this.record.size()]), this.headerMap,
comment,
-                    this.recordNumber);
+                    this.recordNumber, startCharPosition);
         }
         return result;
     }

Modified: commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVRecord.java
URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVRecord.java?rev=1635052&r1=1635051&r2=1635052&view=diff
==============================================================================
--- commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVRecord.java (original)
+++ commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVRecord.java Wed Oct 29
05:44:40 2014
@@ -36,6 +36,8 @@ public final class CSVRecord implements 
 
     private static final long serialVersionUID = 1L;
 
+    private final long characterPosition;
+
     /** The accumulated comments (if any) */
     private final String comment;
 
@@ -44,15 +46,16 @@ public final class CSVRecord implements 
 
     /** The record number. */
     private final long recordNumber;
-
+    
     /** The values of the record */
     private final String[] values;
 
-    CSVRecord(final String[] values, final Map<String, Integer> mapping, final String
comment, final long recordNumber) {
+    CSVRecord(final String[] values, final Map<String, Integer> mapping, final String
comment, final long recordNumber, long characterPosition) {
         this.recordNumber = recordNumber;
         this.values = values != null ? values : EMPTY_STRING_ARRAY;
         this.mapping = mapping;
         this.comment = comment;
+        this.characterPosition = characterPosition;
     }
 
     /**
@@ -110,6 +113,16 @@ public final class CSVRecord implements 
     }
 
     /**
+     * Returns the start position of this record as a character position in the source stream.
This may or may not
+     * correspond to the byte position depending on the character set.
+     *
+     * @return the position of this record in the source stream.
+     */
+    public long getCharacterPosition() {
+        return characterPosition;
+    }
+    
+    /**
      * Returns the comment for this record, if any.
      *
      * @return the comment for this record, or null if no comment for this record is available.

Modified: commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java
URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java?rev=1635052&r1=1635051&r2=1635052&view=diff
==============================================================================
--- commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java (original)
+++ commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVParserTest.java Wed Oct
29 05:44:40 2014
@@ -299,22 +299,23 @@ public class CSVParserTest {
         }
     }
 
-//    @Test
-//    public void testStartWithEmptyLinesThenHeaders() throws Exception {
-//        final String[] codes = { "\r\n\r\n\r\nhello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n",
"hello,\"\"\n\n\n" };
-//        final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore
empty lines
-//                { "" } };
-//        for (final String code : codes) {
-//            final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL);
-//            final List<CSVRecord> records = parser.getRecords();
-//            assertEquals(res.length, records.size());
-//            assertTrue(records.size() > 0);
-//            for (int i = 0; i < res.length; i++) {
-//                assertArrayEquals(res[i], records.get(i).values());
-//            }
-//            parser.close();
-//        }
-//    }
+    // @Test
+    // public void testStartWithEmptyLinesThenHeaders() throws Exception {
+    // final String[] codes = { "\r\n\r\n\r\nhello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n",
+    // "hello,\"\"\n\n\n" };
+    // final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore
empty lines
+    // { "" } };
+    // for (final String code : codes) {
+    // final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL);
+    // final List<CSVRecord> records = parser.getRecords();
+    // assertEquals(res.length, records.size());
+    // assertTrue(records.size() > 0);
+    // for (int i = 0; i < res.length; i++) {
+    // assertArrayEquals(res[i], records.get(i).values());
+    // }
+    // parser.close();
+    // }
+    // }
 
     @Test
     public void testEndOfFileBehaviorCSV() throws Exception {
@@ -475,6 +476,16 @@ public class CSVParserTest {
     }
 
     @Test
+    public void testGetRecordPositionWithCRLF() throws Exception {
+        this.validateRecordPosition(CRLF);
+    }
+
+    @Test
+    public void testGetRecordPositionWithLF() throws Exception {
+        this.validateRecordPosition(String.valueOf(LF));
+    }
+
+    @Test
     public void testGetOneLine() throws IOException {
         final CSVParser parser = CSVParser.parse(CSV_INPUT_1, CSVFormat.DEFAULT);
         final CSVRecord record = parser.getRecords().get(0);
@@ -902,4 +913,65 @@ public class CSVParserTest {
         parser.close();
     }
 
+    private void validateRecordPosition(final String lineSeparator) throws IOException {
+        final String nl = lineSeparator; // used as linebreak in values for better distinction
+
+        String code = "a,b,c" + lineSeparator + "1,2,3" + lineSeparator +
+        // to see if recordPosition correctly points to the enclosing quote
+                "'A" + nl + "A','B" + nl + "B',CC" + lineSeparator +
+                // unicode test... not very relevant while operating on strings instead of
bytes, but for
+                // completeness...
+                "\u00c4,\u00d6,\u00dc" + lineSeparator + "EOF,EOF,EOF";
+
+        final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(lineSeparator);
+        CSVParser parser = CSVParser.parse(code, format);
+
+        CSVRecord record;
+        assertEquals(0, parser.getRecordNumber());
+
+        assertNotNull(record = parser.nextRecord());
+        assertEquals(1, record.getRecordNumber());
+        assertEquals(code.indexOf('a'), record.getCharacterPosition());
+
+        assertNotNull(record = parser.nextRecord());
+        assertEquals(2, record.getRecordNumber());
+        assertEquals(code.indexOf('1'), record.getCharacterPosition());
+
+        assertNotNull(record = parser.nextRecord());
+        final long positionRecord3 = record.getCharacterPosition();
+        assertEquals(3, record.getRecordNumber());
+        assertEquals(code.indexOf("'A"), record.getCharacterPosition());
+        assertEquals("A" + lineSeparator + "A", record.get(0));
+        assertEquals("B" + lineSeparator + "B", record.get(1));
+        assertEquals("CC", record.get(2));
+
+        assertNotNull(record = parser.nextRecord());
+        assertEquals(4, record.getRecordNumber());
+        assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition());
+
+        assertNotNull(record = parser.nextRecord());
+        assertEquals(5, record.getRecordNumber());
+        assertEquals(code.indexOf("EOF"), record.getCharacterPosition());
+
+        parser.close();
+
+        // now try to read starting at record 3
+        parser = CSVParser.parse(code.substring((int) positionRecord3), format);
+        parser.setNextRecordNumber(3);
+        parser.setNextCharacterPosition(positionRecord3);
+
+        assertNotNull(record = parser.nextRecord());
+        assertEquals(3, record.getRecordNumber());
+        assertEquals(code.indexOf("'A"), record.getCharacterPosition());
+        assertEquals("A" + lineSeparator + "A", record.get(0));
+        assertEquals("B" + lineSeparator + "B", record.get(1));
+        assertEquals("CC", record.get(2));
+
+        assertNotNull(record = parser.nextRecord());
+        assertEquals(4, record.getRecordNumber());
+        assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition());
+        assertEquals("\u00c4", record.get(0));
+
+        parser.close();
+    }
 }

Modified: commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVRecordTest.java
URL: http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVRecordTest.java?rev=1635052&r1=1635051&r2=1635052&view=diff
==============================================================================
--- commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVRecordTest.java (original)
+++ commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVRecordTest.java Wed Oct
29 05:44:40 2014
@@ -45,12 +45,12 @@ public class CSVRecordTest {
     @Before
     public void setUp() throws Exception {
         values = new String[] { "A", "B", "C" };
-        record = new CSVRecord(values, null, null, 0);
+        record = new CSVRecord(values, null, null, 0, -1);
         header = new HashMap<String, Integer>();
         header.put("first", Integer.valueOf(0));
         header.put("second", Integer.valueOf(1));
         header.put("third", Integer.valueOf(2));
-        recordWithHeader = new CSVRecord(values, header, null, 0);
+        recordWithHeader = new CSVRecord(values, header, null, 0, -1);
     }
 
     @Test



Mime
View raw message