jena-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject [1/4] git commit: Rework CSVParser wrapper and make setup RFC compliant.
Date Sat, 04 Oct 2014 14:29:18 GMT
Repository: jena
Updated Branches:
  refs/heads/jena-csv da76ae38c -> 468538e1b


Rework CSVParser wrapper and make setup RFC compliant.

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/3db6a8a5
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/3db6a8a5
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/3db6a8a5

Branch: refs/heads/jena-csv
Commit: 3db6a8a5f7877537662bb6ef5ca45e819fd987d6
Parents: da76ae3
Author: Andy Seaborne <andy@seaborne.org>
Authored: Fri Oct 3 21:47:44 2014 +0100
Committer: Andy Seaborne <andy@seaborne.org>
Committed: Fri Oct 3 21:47:44 2014 +0100

----------------------------------------------------------------------
 .../hp/hpl/jena/sparql/resultset/CSVInput.java  |  1 -
 .../org/apache/jena/atlas/csv/CSVParser.java    | 67 +++++++++-----------
 .../apache/jena/atlas/csv/TestCSVParser.java    |  4 +-
 3 files changed, 33 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/3db6a8a5/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/CSVInput.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/CSVInput.java b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/CSVInput.java
index db5f3f3..abfeeb5 100644
--- a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/CSVInput.java
+++ b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/CSVInput.java
@@ -117,7 +117,6 @@ public class CSVInput
             FmtLog.warn(log, "Boolean result variable is '%s', not '_askResult'", vars.get(0).getName())
; 
         }
         
-        
         List<String> line = parser.parse1() ;
         if ( line.size() != 1 ) {
             throw new ARQException("CSV Boolean Results malformed: data line='"+line+"'")
;

http://git-wip-us.apache.org/repos/asf/jena/blob/3db6a8a5/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java b/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java
index 88b56c5..83613ae 100644
--- a/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java
+++ b/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java
@@ -18,25 +18,25 @@
 
 package org.apache.jena.atlas.csv ;
 
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
+import java.io.IOException ;
+import java.io.InputStream ;
+import java.io.Reader ;
+import java.util.Iterator ;
+import java.util.List ;
 
-import org.apache.commons.csv.CSVFormat;
-import org.apache.commons.csv.CSVRecord;
-import org.apache.jena.atlas.io.IO;
+import org.apache.commons.csv.CSVFormat ;
+import org.apache.commons.csv.CSVRecord ;
+import org.apache.jena.atlas.io.IO ;
+import org.apache.jena.atlas.iterator.Iter ;
+import org.apache.jena.atlas.iterator.Transform ;
 
-/** Written specifically to handle SPARQL results CSV files.
- *  Acts as a wrapper for Commons CSV parser.
+/** 
+ *  Wrapper for Commons CSV parser.
  */
 public class CSVParser implements Iterable<List<String>>
 {
-    
     private final org.apache.commons.csv.CSVParser parser;
+    private final Iterator<CSVRecord> iterator ;
     
     public static CSVParser create(String filename) {
         InputStream input = IO.openFile(filename) ;
@@ -44,49 +44,44 @@ public class CSVParser implements Iterable<List<String>>
     }
 
     public static CSVParser create(InputStream input) {
-        CSVParser parser = new CSVParser(new InputStreamReader(input)) ;
+        CSVParser parser = new CSVParser(IO.asBufferedUTF8(input)) ;
         return parser ; 
     }
-    
+
+    /** Be careful about charsets */
     public static CSVParser create(Reader input) {
         CSVParser parser = new CSVParser(input) ;
         return parser ; 
     }
 
-    public CSVParser(Reader input) {
+    private CSVParser(Reader input) {
         try {
-            this.parser = CSVFormat.EXCEL.withQuote('\'').parse(input);
+            this.parser = CSVFormat.RFC4180.parse(input);
+            this.iterator = parser.iterator() ;
         } catch (IOException e) {
             throw new CSVParseException("Failed to create the CSV parser: " + e.getMessage(),
e);
         }
     }
     
+    private static Transform<CSVRecord, List<String>> transform = new Transform<CSVRecord,
List<String>>() {
+        @Override
+        public List<String> convert(CSVRecord record) {
+            return recordToList(record) ;
+        }
+    } ;
+    
     @Override
     public Iterator<List<String>> iterator() {
-        List<List<String>> list = new ArrayList<>();
-        for (CSVRecord record : parser) {
-            List<String> row = new ArrayList<>();
-            for (String columnValue : record) {
-                row.add(columnValue);
-            }
-            list.add(row);
-        }
-        return list.iterator();
+        return Iter.map(iterator, transform) ;
     }
 
     public List<String> parse1() {
-        Iterator<List<String>> iterator = iterator();
-        if (iterator.hasNext()) 
-        {
-            final List<String> firstRow = iterator.next();
-            return firstRow;
-        }
+        if (iterator.hasNext())
+             return recordToList(iterator.next()) ;
         return null;
     }
 
-    static void exception(String msg, long line, long col) {
-        if ( line >= 0 && col > 0 )
-            msg = String.format("[%s, %s] %s", line, col, msg) ;
-        throw new CSVParseException(msg) ;
+    private static List<String> recordToList(CSVRecord record) {
+        return Iter.toList(record.iterator()) ;
     }
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/3db6a8a5/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java b/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java
index de66fdb..7574ea6 100644
--- a/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java
+++ b/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java
@@ -41,7 +41,7 @@ public class TestCSVParser extends BaseTest
     @Test public void csv_parse_07() { csv(",,\n", new String[][] {{"", "", ""}}) ; }
     
     @Test public void csv_parse_10() { csv("\n\n", new String[][] { {""}, {""} }) ; }
-    @Test public void csv_parse_11() { csv("'aa'\naa\n", new String[][] { {"aa"}, {"aa"}
}) ; }
+    @Test public void csv_parse_11() { csv("'aa'\naa\n", new String[][] { {"'aa'"}, {"aa"}
}) ; }
     @Test public void csv_parse_12() { csv("\naa", new String[][] { {""}, {"aa"} }) ; }
     @Test public void csv_parse_13() { csv("a,b\nc,d", new String[][] { {"a", "b"}, {"c",
"d"} }) ; }
     @Test public void csv_parse_14() { csv("a,b\rc,d", new String[][] { {"a", "b"}, {"c",
"d"} }) ; }
@@ -63,7 +63,7 @@ public class TestCSVParser extends BaseTest
     private static void csv(String input, List<List<String>> answers)
     {
         List<List<String>> x = new ArrayList<>() ;
-        CSVParser parser = new CSVParser(new StringReader(input)) ;
+        CSVParser parser = CSVParser.create(new StringReader(input)) ;
         for (List<String> row : parser) {
             x.add(row) ;
         }


Mime
View raw message