cocoon-cvs mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From p...@apache.org
Subject svn commit: r122676 - /cocoon/branches/BRANCH_2_1_X/src/blocks/scratchpad/java/org/apache/cocoon/generation/CSVGenerator.java
Date Fri, 17 Dec 2004 20:50:36 GMT
Author: pier
Date: Fri Dec 17 12:50:34 2004
New Revision: 122676

URL: http://svn.apache.org/viewcvs?view=rev&rev=122676
Log:
A simple generator from CSV files (might be handy with database dumps, eh?)
Added:
   cocoon/branches/BRANCH_2_1_X/src/blocks/scratchpad/java/org/apache/cocoon/generation/CSVGenerator.java

Added: cocoon/branches/BRANCH_2_1_X/src/blocks/scratchpad/java/org/apache/cocoon/generation/CSVGenerator.java
Url: http://svn.apache.org/viewcvs/cocoon/branches/BRANCH_2_1_X/src/blocks/scratchpad/java/org/apache/cocoon/generation/CSVGenerator.java?view=auto&rev=122676
==============================================================================
--- (empty file)
+++ cocoon/branches/BRANCH_2_1_X/src/blocks/scratchpad/java/org/apache/cocoon/generation/CSVGenerator.java
Fri Dec 17 12:50:34 2004
@@ -0,0 +1,428 @@
+/* =============================================================================== *
+ * Copyright (C) 1999-2004, The Apache Software Foundation.   All rights reserved. *
+ *                                                                                 *
+ * Licensed under the Apache License, Version 2.0 (the "License"). You may not use *
+ * this file except in  compliance with the License.  You may obtain a copy of the *
+ * License at <http://www.apache.org/licenses/LICENSE-2.0>.                       
*
+ *                                                                                 *
+ * Unless required by applicable law or agreed to in writing, software distributed *
+ * under the License  is distributed on an  "AS IS"  BASIS,  WITHOUT WARRANTIES OR *
+ * CONDITIONS  OF ANY KIND,  either express  or implied.  See the License  for the *
+ * specific language governing permissions and limitations under the License.      *
+ * =============================================================================== */
+package org.apache.cocoon.generation;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.CharArrayWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.avalon.framework.parameters.Parameters;
+import org.apache.cocoon.ProcessingException;
+import org.apache.cocoon.environment.SourceResolver;
+import org.apache.excalibur.source.Source;
+import org.xml.sax.Attributes;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+
+/**
+ * <p>A simple parser converting a Comma Separated Values (CSV) file into XML.</p>
+ * 
+ * <p>This parser is controlled by the following sitemap parameters:</p>
+ * 
+ * <ul>
+ *   <li>
+ *     <b>process-headers</b>: whether the first line in the CSV is considered
+ *     to be the header defining column names (the resulting output will be
+ *     different if this is <i>true</i> or <i>false</i> (default:
<i>false</i>).
+ *   </li>
+ *   <li>
+ *     <b>encoding</b>: the character encoding (UTF-8, ISO8859-1, ...) used to
+ *     interpret the input CSV source file (default: <i>system default</i>).
+ *   </li>
+ *   <li>
+ *     <b>separator</b>: the field-separator character in the CSV file (comma,
+ *     tab, ...) (default: <i>,</i> <small>comma</small>).
+ *   </li>
+ *   <li>
+ *     <b>escape</b>: the character used to escape fields, or part of them, in
+ *     the CSV file (default: <i>"</i> <small>quote</small>).
+ *   </li>
+ *   <li>
+ *     <b>buffer-size</b>: the size of the buffer used for reading the source
+ *     CSV file (default: <i>4096 bytes</i>).
+ *   </li>
+ * </ul>
+ *
+ * <p>The generated output will look something like the following:</p>
+ * 
+ * <pre>
+ * &lt;?xml version="1.0" encoding="ISO-8859-1"?&gt;
+ * &lt;csv:document xmlns:csv="http://apache.org/cocoon/csv/1.0"&gt;
+ *   &lt;csv:header&gt;
+ *     &lt;csv:column number="1"&gt;Column A&lt;/csv:column&gt;
+ *     &lt;csv:column number="2"&gt;Column B&lt;/csv:column&gt;
+ *     &lt;csv:column number="3"&gt;Column C&lt;/csv:column&gt;
+ *   &lt;/csv:header&gt;
+ *   &lt;csv:record number="1"&gt;
+ *     &lt;csv:field number="1" column="Column A"&gt;Field A1&lt;/csv:field&gt;
+ *     &lt;csv:field number="2" column="Column B"&gt;Field B1&lt;/csv:field&gt;
+ *     &lt;csv:field number="3" column="Column C"&gt;Field C1&lt;/csv:field&gt;
+ *   &lt;/csv:record&gt;
+ *   &lt;csv:record number="2"&gt;
+ *     &lt;csv:field number="1" column="Column A"&gt;Field A2&lt;/csv:field&gt;
+ *     &lt;csv:field number="2" column="Column B"&gt;Field B2&lt;/csv:field&gt;
+ *     &lt;csv:field number="3" column="Column C"&gt;Field C2&lt;/csv:field&gt;
+ *   &lt;/csv:record&gt;
+ * &lt;/csv:document&gt;
+ * </pre>
+ *
+ * <p>Note that this generator has been thoroughly tested with CSV files generated
+ * by <a href="http://office.microsoft.com/" target="_new">Microsoft Excel</a>.
+ * Unfortunately no official CSV specification has ever been published by
+ * any standard body, so the interpretation of the format might be slightly
+ * different in cases.</p>
+ *
+ * @author <a href="mailto:pier@apache.org">Pier Fumagalli</a>
+ * @author Copyright &copy; 2000-2004 <a href="http://www.apache.org/">The Apache
+ *         Software Foundation</a>. All rights reserved.
+ */
+public class CSVGenerator extends FileGenerator {
+
+    /** <p>The namespace URI of XML generated by this instance.</p> */
+    public static final String NAMESPACE_URI = "http://apache.org/cocoon/csv/1.0";
+    /** <p>The namespace prefix of XML generated by this instance.</p> */
+    public static final String NAMESPACE_PREFIX = "csv";
+
+    /** <p>The default encoding configured in the Java VM.</p> */
+    private static final String DEFAULT_ENCODING = 
+        new InputStreamReader(new ByteArrayInputStream(new byte[0])).getEncoding();
+    /** <p>The default field separator character.</p> */
+    private static final String DEFAULT_SEPARATOR = ",";
+    /** <p>The default field separator character.</p> */
+    private static final String DEFAULT_ESCAPE = "\"";
+    /** <p>The default field separator character.</p> */
+    private static final int DEFAULT_BUFFER_SIZE = 4096;
+    /** <p>A string used for indenting.</p> */
+    private static final char INDENT_STRING[] = "\n          ".toCharArray();
+
+    /** <p>The encoding used to read the CSV resource from a stream.</p> */
+    private String encoding = DEFAULT_ENCODING;
+    /** <p>The character used to separate fields.</p> */
+    private char separator = DEFAULT_SEPARATOR.charAt(0);
+    /** <p>The character used to initiate and terminate esacaped sequences.</p>
*/
+    private char escape = DEFAULT_ESCAPE.charAt(0);
+    /** <p>The size of the buffer used to read the input.</p> */
+    private int buffersize = DEFAULT_BUFFER_SIZE;
+    /** <p>The current field (column) number in the current record.</p> */
+    private int fieldnumber = 1;
+    /** <p>The current record (line) number in the current CSV.</p> */
+    private int recordnumber = 1;
+    /** <p>A flag indicating whether the &lt;record&gt; tag was opened.</p>
*/
+    private boolean openrecord = false;
+    /** <p>The character buffer for the current field.</p> */
+    private CharArrayWriter buffer = null;
+    /** <p>A map of all known columns or null if no headers are processed.</p>
*/
+    private Map columns = null;
+
+    /**
+     * <p>Create a new {@link CSVGenerator} instance.</p>
+     */
+    public CSVGenerator() {
+        super();
+    }
+
+    /**
+     * <p>Recycle this component.</p>.
+     */
+    public void recycle() {
+        super.recycle();
+        
+        this.encoding = DEFAULT_ENCODING;
+        this.separator = DEFAULT_SEPARATOR.charAt(0);
+        this.escape = DEFAULT_ESCAPE.charAt(0);
+        this.buffersize = DEFAULT_BUFFER_SIZE;
+        this.buffer = null;
+        this.columns = null;
+        this.recordnumber = 1;
+        this.fieldnumber = 1;
+        this.openrecord = false;
+    }
+
+    /**
+     * <p>Setup this {@link CSVGenerator} instance.</p>
+     */
+    public void setup(SourceResolver resolver, Map object_model, String source,
+                      Parameters parameters)
+    throws ProcessingException, SAXException, IOException {
+        super.setup(resolver, object_model, source, parameters);
+
+        boolean header = parameters.getParameterAsBoolean("process-header", false);
+
+        this.encoding = parameters.getParameter("encoding", DEFAULT_ENCODING);
+        this.separator = parameters.getParameter("separator", DEFAULT_SEPARATOR).charAt(0);
+        this.escape = parameters.getParameter("escape", DEFAULT_ESCAPE).charAt(0);
+        this.buffersize = parameters.getParameterAsInteger("buffer-size", DEFAULT_BUFFER_SIZE);
+        this.buffer = new CharArrayWriter();
+        this.columns =  (header ? new HashMap() : null);
+        this.recordnumber = (header ? 0 : 1);
+        this.fieldnumber = 1;
+        this.openrecord = false;
+    }
+
+    /**
+     * <p>Generate the unique key.</p>
+     */
+    public Serializable getKey() {
+        String key = this.inputSource.getURI();
+        if (this.columns != null) return (key + "+headers");
+        return key;
+    }
+
+    /**
+     * <p>Generate XML data from a Comma Separated Value resource.</p>.
+     */
+    public void generate()
+    throws IOException, SAXException, ProcessingException {
+
+        /* Create a new Reader correctly decoding the source stream */
+        CSVReader csv = new CSVReader(this.inputSource, this.encoding, this.buffersize);
+
+        try {
+            /* Start the document */
+            this.contentHandler.setDocumentLocator(csv);
+            this.contentHandler.startDocument();
+            this.contentHandler.startPrefixMapping(NAMESPACE_PREFIX, NAMESPACE_URI);
+            this.indent(0);
+            this.startElement("document");
+
+            /* Allocate buffer and status for parsing */
+            boolean unescaped = true;
+            int prev = -1;
+            int curr = -1;
+
+            /* Parse the file reading characters one-by-one */
+            while ((curr = csv.read()) >= 0) {
+
+                /* Process any occurrence of the escape character */
+                if (curr == this.escape) {
+                    if ((unescaped) && (prev == this.escape)) {
+                        this.buffer.write(this.escape);
+                    }
+                    unescaped = ! unescaped;
+                    prev = curr;
+                    continue;
+                }
+
+                /* Process any occurrence of the field separator */
+                if ((unescaped) && (curr == this.separator)) {
+                    this.dumpField();
+                    prev = curr;
+                    continue;
+                }
+
+                /* Process newline characters */
+                if ((unescaped) && ((curr == '\r') || (curr == '\n'))) {
+                    this.dumpField();
+                    this.dumpRecord();
+
+                    /* Record numbering */
+                    if (((curr == '\n') && (prev != '\r')) || (curr == '\r')) {
+                        this.recordnumber ++;
+                    }
+                    
+                    /* Nothing else to do */
+                    prev = curr;
+                    continue;
+                }
+
+                /* Any other character simply gets added to the buffer */
+                this.buffer.write(curr);
+                prev = curr;
+            }
+
+            /* Terminate any hanging open record element (just in case) */
+            this.dumpRecord();
+
+            /* Terminate the document */
+            this.indent(0);
+            this.endElement("document");
+            this.contentHandler.endPrefixMapping(NAMESPACE_PREFIX);
+            this.contentHandler.endDocument();
+
+        } finally {
+            csv.close();
+        }
+    }
+
+    
+    private void dumpField()
+    throws SAXException {
+        if (this.buffer.size() < 1) {
+            this.fieldnumber ++;
+            return;
+        }
+
+        if (! this.openrecord) {
+            this.indent(4);
+
+            if (this.recordnumber > 0) {
+                AttributesImpl attributes = new AttributesImpl();
+                String value = Integer.toString(this.recordnumber);
+                attributes.addAttribute("", "number", "number", "CDATA", value);
+                this.startElement("record", attributes);
+            } else {
+                this.startElement("header");
+            }
+            this.openrecord = true;
+        }
+
+        /* Enclode the field in the proper element */
+        String element = "field";
+        char array[] = this.buffer.toCharArray();
+        this.indent(8);
+
+        AttributesImpl attributes = new AttributesImpl();
+        String value = Integer.toString(this.fieldnumber);
+        attributes.addAttribute("", "number", "number", "CDATA", value);
+
+        if (this.recordnumber < 1) {
+            this.columns.put(new Integer(this.fieldnumber), new String(array));
+            element = "column";
+        } else if (this.columns != null) {
+            String header = (String) this.columns.get(new Integer(this.fieldnumber));
+            if (header != null) {
+                attributes.addAttribute("", "column", "column", "CDATA", header);
+            }
+        }
+
+        this.startElement(element, attributes);
+        this.contentHandler.characters(array, 0, array.length);
+        this.endElement(element);
+        this.buffer.reset();
+
+        this.fieldnumber ++;
+    }
+
+    private void dumpRecord()
+    throws SAXException {
+        if (this.openrecord) {
+            this.indent(4);
+            if (this.recordnumber > 0) {
+                this.endElement("record");
+            } else {
+                this.endElement("header");
+            }
+            this.openrecord = false;
+        }
+        this.fieldnumber = 1;
+    }
+
+    private void indent(int level)
+    throws SAXException {
+        this.contentHandler.characters(INDENT_STRING, 0, level + 1);
+    }
+
+    private void startElement(String name)
+    throws SAXException {
+        this.startElement(name, new AttributesImpl());
+    }
+
+    private void startElement(String name, Attributes atts)
+    throws SAXException {
+        if (name == null) throw new NullPointerException("Null name");
+        if (atts == null) atts = new AttributesImpl();
+        String qual = NAMESPACE_PREFIX + ':' + name;
+        this.contentHandler.startElement(NAMESPACE_URI, name, qual, atts);
+    }
+
+    private void endElement(String name)
+    throws SAXException {
+        String qual = NAMESPACE_PREFIX + ':' + name;
+        this.contentHandler.endElement(NAMESPACE_URI, name, qual);
+    }
+
+    private static final class CSVReader extends Reader implements Locator {
+        
+        private String uri = null;
+        private Reader input = null;
+        private int column = 1;
+        private int line = 1;
+        private int last = -1;
+
+        private CSVReader(Source source, String encoding, int buffer)
+        throws IOException {
+            InputStream stream = source.getInputStream();
+            Reader reader = new InputStreamReader(stream, encoding);
+            this.input = new BufferedReader(reader, buffer);
+            this.uri = source.getURI();
+        }
+
+        public String getPublicId() {
+            return null;
+        }
+
+        public String getSystemId() {
+            return this.uri;
+        }
+
+        public int getLineNumber() {
+            return this.line;
+        }
+
+        public int getColumnNumber() {
+            return this.column;
+        }
+
+        public void close()
+        throws IOException {
+            this.input.close();
+        }
+        
+        public int read()
+        throws IOException {
+            int c = this.input.read();
+            if (c < 0) return c;
+
+            if (((c == '\n') && (this.last != '\r')) || (c == '\r')) {
+                this.column = 1;
+                this.line ++;
+            }
+
+            this.last = c;
+            return c;
+        }
+
+        public int read(char b[], int o, int l)
+        throws IOException {
+            if (b == null) throw new NullPointerException();
+            if ((o<0)||(o>b.length)||(l<0)||((o+l)>b.length)||((o+l)<0)) {
+                throw new IndexOutOfBoundsException();
+            }
+            if (l == 0) return 0;
+
+            int c = read();
+            if (c == -1) return -1;
+            b[o] = (char)c;
+
+            int i = 1;
+            try {
+                for (i = 1; i < l ; i++) {
+                    c = read();
+                    if (c == -1) break;
+                    if (b != null) b[o + i] = (char)c;
+                }
+            } catch (IOException ee) {
+                return i;
+            }
+            return i;
+        }
+    }
+}

Mime
View raw message