Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 0F02F200B12 for ; Sun, 29 May 2016 04:49:09 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 0D916160A34; Sun, 29 May 2016 02:49:09 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id AF241160A3D for ; Sun, 29 May 2016 04:49:06 +0200 (CEST) Received: (qmail 35225 invoked by uid 500); 29 May 2016 02:49:05 -0000 Mailing-List: contact notifications-help@commons.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@commons.apache.org Delivered-To: mailing list notifications@commons.apache.org Received: (qmail 35216 invoked by uid 99); 29 May 2016 02:49:05 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd1-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 29 May 2016 02:49:05 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd1-us-west.apache.org (ASF Mail Server at spamd1-us-west.apache.org) with ESMTP id 4E989C1FC9 for ; Sun, 29 May 2016 02:49:05 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd1-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -0.426 X-Spam-Level: X-Spam-Status: No, score=-0.426 tagged_above=-999 required=6.31 tests=[KAM_LAZY_DOMAIN_SECURITY=1, RP_MATCHES_RCVD=-1.426] autolearn=disabled Received: from mx1-lw-us.apache.org ([10.40.0.8]) by localhost (spamd1-us-west.apache.org [10.40.0.7]) (amavisd-new, port 10024) with ESMTP id Ya_YrANj7t40 for ; Sun, 29 May 2016 02:49:00 +0000 (UTC) Received: from mailrelay1-us-west.apache.org (mailrelay1-us-west.apache.org [209.188.14.139]) by mx1-lw-us.apache.org (ASF Mail Server at mx1-lw-us.apache.org) with ESMTP id 3C18C5FD49 for ; Sun, 29 May 2016 02:48:50 +0000 (UTC) Received: from svn01-us-west.apache.org (svn.apache.org [10.41.0.6]) by mailrelay1-us-west.apache.org (ASF Mail Server at mailrelay1-us-west.apache.org) with ESMTP id 3BA35E59EA for ; Sun, 29 May 2016 02:48:47 +0000 (UTC) Received: from svn01-us-west.apache.org (localhost [127.0.0.1]) by svn01-us-west.apache.org (ASF Mail Server at svn01-us-west.apache.org) with ESMTP id 2A47A3A0113 for ; Sun, 29 May 2016 02:48:47 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r989467 [40/43] - in /websites/production/commons/content/proper/commons-csv/archives/1.4: ./ apidocs/ apidocs/org/ apidocs/org/apache/ apidocs/org/apache/commons/ apidocs/org/apache/commons/csv/ apidocs/org/apache/commons/csv/class-use/ ap... Date: Sun, 29 May 2016 02:48:45 -0000 To: notifications@commons.apache.org From: ggregory@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20160529024847.2A47A3A0113@svn01-us-west.apache.org> archived-at: Sun, 29 May 2016 02:49:09 -0000 Added: websites/production/commons/content/proper/commons-csv/archives/1.4/xref/org/apache/commons/csv/CSVParser.html ============================================================================== --- websites/production/commons/content/proper/commons-csv/archives/1.4/xref/org/apache/commons/csv/CSVParser.html (added) +++ websites/production/commons/content/proper/commons-csv/archives/1.4/xref/org/apache/commons/csv/CSVParser.html Sun May 29 02:48:43 2016 @@ -0,0 +1,551 @@ + + + +CSVParser xref + + + +
+1   /*
+2    * Licensed to the Apache Software Foundation (ASF) under one or more
+3    * contributor license agreements.  See the NOTICE file distributed with
+4    * this work for additional information regarding copyright ownership.
+5    * The ASF licenses this file to You under the Apache License, Version 2.0
+6    * (the "License"); you may not use this file except in compliance with
+7    * the License.  You may obtain a copy of the License at
+8    *
+9    *      http://www.apache.org/licenses/LICENSE-2.0
+10   *
+11   * Unless required by applicable law or agreed to in writing, software
+12   * distributed under the License is distributed on an "AS IS" BASIS,
+13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+14   * See the License for the specific language governing permissions and
+15   * limitations under the License.
+16   */
+17  
+18  package org.apache.commons.csv;
+19  
+20  import java.io.Closeable;
+21  import java.io.File;
+22  import java.io.FileInputStream;
+23  import java.io.IOException;
+24  import java.io.InputStreamReader;
+25  import java.io.Reader;
+26  import java.io.StringReader;
+27  import java.net.URL;
+28  import java.nio.charset.Charset;
+29  import java.util.ArrayList;
+30  import java.util.Arrays;
+31  import java.util.Iterator;
+32  import java.util.LinkedHashMap;
+33  import java.util.List;
+34  import java.util.Map;
+35  import java.util.NoSuchElementException;
+36  import java.util.TreeMap;
+37  
+38  import static org.apache.commons.csv.Token.Type.*;
+39  
+40  /**
+41   * Parses CSV files according to the specified format.
+42   *
+43   * Because CSV appears in many different dialects, the parser supports many formats by allowing the
+44   * specification of a {@link CSVFormat}.
+45   *
+46   * The parser works record wise. It is not possible to go back, once a record has been parsed from the input stream.
+47   *
+48   * <h2>Creating instances</h2>
+49   * <p>
+50   * There are several static factory methods that can be used to create instances for various types of resources:
+51   * </p>
+52   * <ul>
+53   *     <li>{@link #parse(java.io.File, Charset, CSVFormat)}</li>
+54   *     <li>{@link #parse(String, CSVFormat)}</li>
+55   *     <li>{@link #parse(java.net.URL, java.nio.charset.Charset, CSVFormat)}</li>
+56   * </ul>
+57   * <p>
+58   * Alternatively parsers can also be created by passing a {@link Reader} directly to the sole constructor.
+59   *
+60   * For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut:
+61   * </p>
+62   * <pre>
+63   * for(CSVRecord record : CSVFormat.EXCEL.parse(in)) {
+64   *     ...
+65   * }
+66   * </pre>
+67   *
+68   * <h2>Parsing record wise</h2>
+69   * <p>
+70   * To parse a CSV input from a file, you write:
+71   * </p>
+72   *
+73   * <pre>
+74   * File csvData = new File(&quot;/path/to/csv&quot;);
+75   * CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180);
+76   * for (CSVRecord csvRecord : parser) {
+77   *     ...
+78   * }
+79   * </pre>
+80   *
+81   * <p>
+82   * This will read the parse the contents of the file using the
+83   * <a href="http://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a> format.
+84   * </p>
+85   *
+86   * <p>
+87   * To parse CSV input in a format like Excel, you write:
+88   * </p>
+89   *
+90   * <pre>
+91   * CSVParser parser = CSVParser.parse(csvData, CSVFormat.EXCEL);
+92   * for (CSVRecord csvRecord : parser) {
+93   *     ...
+94   * }
+95   * </pre>
+96   *
+97   * <p>
+98   * If the predefined formats don't match the format at hands, custom formats can be defined. More information about
+99   * customising CSVFormats is available in {@link CSVFormat CSVFormat JavaDoc}.
+100  * </p>
+101  *
+102  * <h2>Parsing into memory</h2>
+103  * <p>
+104  * If parsing record wise is not desired, the contents of the input can be read completely into memory.
+105  * </p>
+106  *
+107  * <pre>
+108  * Reader in = new StringReader(&quot;a;b\nc;d&quot;);
+109  * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL);
+110  * List&lt;CSVRecord&gt; list = parser.getRecords();
+111  * </pre>
+112  *
+113  * <p>
+114  * There are two constraints that have to be kept in mind:
+115  * </p>
+116  *
+117  * <ol>
+118  *     <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from
+119  *     the input, those records will not end up in the in memory representation of your CSV data.</li>
+120  *     <li>Parsing into memory may consume a lot of system resources depending on the input. For example if you're
+121  *     parsing a 150MB file of CSV data the contents will be read completely into memory.</li>
+122  * </ol>
+123  *
+124  * <h2>Notes</h2>
+125  * <p>
+126  * Internal parser state is completely covered by the format and the reader-state.
+127  * </p>
+128  *
+129  * @version $Id: CSVParser.java 1743529 2016-05-12 17:02:05Z ggregory $
+130  *
+131  * @see <a href="package-summary.html">package documentation for more details</a>
+132  */
+133 public final class CSVParser implements Iterable<CSVRecord>, Closeable {
+134 
+135     /**
+136      * Creates a parser for the given {@link File}.
+137      *
+138      * <p><strong>Note:</strong> This method internally creates a FileReader using
+139      * {@link java.io.FileReader#FileReader(java.io.File)} which in turn relies on the default encoding of the JVM that
+140      * is executing the code. If this is insufficient create a URL to the file and use
+141      * {@link #parse(URL, Charset, CSVFormat)}</p>
+142      *
+143      * @param file
+144      *            a CSV file. Must not be null.
+145      * @param charset
+146      *            A charset
+147      * @param format
+148      *            the CSVFormat used for CSV parsing. Must not be null.
+149      * @return a new parser
+150      * @throws IllegalArgumentException
+151      *             If the parameters of the format are inconsistent or if either file or format are null.
+152      * @throws IOException
+153      *             If an I/O error occurs
+154      */
+155     public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException {
+156         Assertions.notNull(file, "file");
+157         Assertions.notNull(format, "format");
+158         return new CSVParser(new InputStreamReader(new FileInputStream(file), charset), format);
+159     }
+160 
+161     /**
+162      * Creates a parser for the given {@link String}.
+163      *
+164      * @param string
+165      *            a CSV string. Must not be null.
+166      * @param format
+167      *            the CSVFormat used for CSV parsing. Must not be null.
+168      * @return a new parser
+169      * @throws IllegalArgumentException
+170      *             If the parameters of the format are inconsistent or if either string or format are null.
+171      * @throws IOException
+172      *             If an I/O error occurs
+173      */
+174     public static CSVParser parse(final String string, final CSVFormat format) throws IOException {
+175         Assertions.notNull(string, "string");
+176         Assertions.notNull(format, "format");
+177 
+178         return new CSVParser(new StringReader(string), format);
+179     }
+180 
+181     /**
+182      * Creates a parser for the given URL.
+183      *
+184      * <p>
+185      * If you do not read all records from the given {@code url}, you should call {@link #close()} on the parser, unless
+186      * you close the {@code url}.
+187      * </p>
+188      *
+189      * @param url
+190      *            a URL. Must not be null.
+191      * @param charset
+192      *            the charset for the resource. Must not be null.
+193      * @param format
+194      *            the CSVFormat used for CSV parsing. Must not be null.
+195      * @return a new parser
+196      * @throws IllegalArgumentException
+197      *             If the parameters of the format are inconsistent or if either url, charset or format are null.
+198      * @throws IOException
+199      *             If an I/O error occurs
+200      */
+201     public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException {
+202         Assertions.notNull(url, "url");
+203         Assertions.notNull(charset, "charset");
+204         Assertions.notNull(format, "format");
+205 
+206         return new CSVParser(new InputStreamReader(url.openStream(), charset), format);
+207     }
+208 
+209     // the following objects are shared to reduce garbage
+210 
+211     private final CSVFormat format;
+212 
+213     /** A mapping of column names to column indices */
+214     private final Map<String, Integer> headerMap;
+215 
+216     private final Lexer lexer;
+217 
+218     /** A record buffer for getRecord(). Grows as necessary and is reused. */
+219     private final List<String> record = new ArrayList<String>();
+220 
+221     /**
+222      * The next record number to assign.
+223      */
+224     private long recordNumber;
+225 
+226     /**
+227      * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination
+228      * with {@link #recordNumber}.
+229      */
+230     private final long characterOffset;
+231 
+232     private final Token reusableToken = new Token();
+233 
+234     /**
+235      * Customized CSV parser using the given {@link CSVFormat}
+236      *
+237      * <p>
+238      * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
+239      * unless you close the {@code reader}.
+240      * </p>
+241      *
+242      * @param reader
+243      *            a Reader containing CSV-formatted input. Must not be null.
+244      * @param format
+245      *            the CSVFormat used for CSV parsing. Must not be null.
+246      * @throws IllegalArgumentException
+247      *             If the parameters of the format are inconsistent or if either reader or format are null.
+248      * @throws IOException
+249      *             If there is a problem reading the header or skipping the first record
+250      */
+251     public CSVParser(final Reader reader, final CSVFormat format) throws IOException {
+252         this(reader, format, 0, 1);
+253     }
+254 
+255     /**
+256      * Customized CSV parser using the given {@link CSVFormat}
+257      *
+258      * <p>
+259      * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
+260      * unless you close the {@code reader}.
+261      * </p>
+262      *
+263      * @param reader
+264      *            a Reader containing CSV-formatted input. Must not be null.
+265      * @param format
+266      *            the CSVFormat used for CSV parsing. Must not be null.
+267      * @param characterOffset
+268      *            Lexer offset when the parser does not start parsing at the beginning of the source.
+269      * @param recordNumber
+270      *            The next record number to assign
+271      * @throws IllegalArgumentException
+272      *             If the parameters of the format are inconsistent or if either reader or format are null.
+273      * @throws IOException
+274      *             If there is a problem reading the header or skipping the first record
+275      * @since 1.1
+276      */
+277     public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber)
+278             throws IOException {
+279         Assertions.notNull(reader, "reader");
+280         Assertions.notNull(format, "format");
+281 
+282         this.format = format;
+283         this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
+284         this.headerMap = this.initializeHeader();
+285         this.characterOffset = characterOffset;
+286         this.recordNumber = recordNumber - 1;
+287     }
+288 
+289     private void addRecordValue(final boolean lastRecord) {
+290         final String input = this.reusableToken.content.toString();
+291         final String inputClean = this.format.getTrim() ? input.trim() : input;
+292         if (lastRecord && inputClean.isEmpty() && this.format.getTrailingDelimiter()) {
+293             return;
+294         }
+295         final String nullString = this.format.getNullString();
+296         this.record.add(inputClean.equals(nullString) ? null : inputClean);
+297     }
+298 
+299     /**
+300      * Closes resources.
+301      *
+302      * @throws IOException
+303      *             If an I/O error occurs
+304      */
+305     @Override
+306     public void close() throws IOException {
+307         if (this.lexer != null) {
+308             this.lexer.close();
+309         }
+310     }
+311 
+312     /**
+313      * Returns the current line number in the input stream.
+314      *
+315      * <p>
+316      * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
+317      * the record number.
+318      * </p>
+319      *
+320      * @return current line number
+321      */
+322     public long getCurrentLineNumber() {
+323         return this.lexer.getCurrentLineNumber();
+324     }
+325 
+326     /**
+327      * Returns a copy of the header map that iterates in column order.
+328      * <p>
+329      * The map keys are column names. The map values are 0-based indices.
+330      * </p>
+331      * @return a copy of the header map that iterates in column order.
+332      */
+333     public Map<String, Integer> getHeaderMap() {
+334         return this.headerMap == null ? null : new LinkedHashMap<String, Integer>(this.headerMap);
+335     }
+336 
+337     /**
+338      * Returns the current record number in the input stream.
+339      *
+340      * <p>
+341      * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
+342      * the line number.
+343      * </p>
+344      *
+345      * @return current record number
+346      */
+347     public long getRecordNumber() {
+348         return this.recordNumber;
+349     }
+350 
+351     /**
+352      * Parses the CSV input according to the given format and returns the content as a list of
+353      * {@link CSVRecord CSVRecords}.
+354      *
+355      * <p>
+356      * The returned content starts at the current parse-position in the stream.
+357      * </p>
+358      *
+359      * @return list of {@link CSVRecord CSVRecords}, may be empty
+360      * @throws IOException
+361      *             on parse error or input read-failure
+362      */
+363     public List<CSVRecord> getRecords() throws IOException {
+364         CSVRecord rec;
+365         final List<CSVRecord> records = new ArrayList<CSVRecord>();
+366         while ((rec = this.nextRecord()) != null) {
+367             records.add(rec);
+368         }
+369         return records;
+370     }
+371 
+372     /**
+373      * Initializes the name to index mapping if the format defines a header.
+374      *
+375      * @return null if the format has no header.
+376      * @throws IOException if there is a problem reading the header or skipping the first record
+377      */
+378     private Map<String, Integer> initializeHeader() throws IOException {
+379         Map<String, Integer> hdrMap = null;
+380         final String[] formatHeader = this.format.getHeader();
+381         if (formatHeader != null) {
+382             hdrMap = this.format.getIgnoreHeaderCase() ?
+383                     new TreeMap<String, Integer>(String.CASE_INSENSITIVE_ORDER) :
+384                     new LinkedHashMap<String, Integer>();
+385 
+386             String[] headerRecord = null;
+387             if (formatHeader.length == 0) {
+388                 // read the header from the first line of the file
+389                 final CSVRecord nextRecord = this.nextRecord();
+390                 if (nextRecord != null) {
+391                     headerRecord = nextRecord.values();
+392                 }
+393             } else {
+394                 if (this.format.getSkipHeaderRecord()) {
+395                     this.nextRecord();
+396                 }
+397                 headerRecord = formatHeader;
+398             }
+399 
+400             // build the name to index mappings
+401             if (headerRecord != null) {
+402                 for (int i = 0; i < headerRecord.length; i++) {
+403                     final String header = headerRecord[i];
+404                     final boolean containsHeader = hdrMap.containsKey(header);
+405                     final boolean emptyHeader = header == null || header.trim().isEmpty();
+406                     if (containsHeader && (!emptyHeader || !this.format.getAllowMissingColumnNames())) {
+407                         throw new IllegalArgumentException("The header contains a duplicate name: \"" + header +
+408                                 "\" in " + Arrays.toString(headerRecord));
+409                     }
+410                     hdrMap.put(header, Integer.valueOf(i));
+411                 }
+412             }
+413         }
+414         return hdrMap;
+415     }
+416 
+417     /**
+418      * Gets whether this parser is closed.
+419      *
+420      * @return whether this parser is closed.
+421      */
+422     public boolean isClosed() {
+423         return this.lexer.isClosed();
+424     }
+425 
+426     /**
+427      * Returns an iterator on the records.
+428      *
+429      * <p>IOExceptions occurring during the iteration are wrapped in a
+430      * RuntimeException.
+431      * If the parser is closed a call to {@code next()} will throw a
+432      * NoSuchElementException.</p>
+433      */
+434     @Override
+435     public Iterator<CSVRecord> iterator() {
+436         return new Iterator<CSVRecord>() {
+437             private CSVRecord current;
+438 
+439             private CSVRecord getNextRecord() {
+440                 try {
+441                     return CSVParser.this.nextRecord();
+442                 } catch (final IOException e) {
+443                     // TODO: This is not great, throw an ISE instead?
+444                     throw new RuntimeException(e);
+445                 }
+446             }
+447 
+448             @Override
+449             public boolean hasNext() {
+450                 if (CSVParser.this.isClosed()) {
+451                     return false;
+452                 }
+453                 if (this.current == null) {
+454                     this.current = this.getNextRecord();
+455                 }
+456 
+457                 return this.current != null;
+458             }
+459 
+460             @Override
+461             public CSVRecord next() {
+462                 if (CSVParser.this.isClosed()) {
+463                     throw new NoSuchElementException("CSVParser has been closed");
+464                 }
+465                 CSVRecord next = this.current;
+466                 this.current = null;
+467 
+468                 if (next == null) {
+469                     // hasNext() wasn't called before
+470                     next = this.getNextRecord();
+471                     if (next == null) {
+472                         throw new NoSuchElementException("No more CSV records available");
+473                     }
+474                 }
+475 
+476                 return next;
+477             }
+478 
+479             @Override
+480             public void remove() {
+481                 throw new UnsupportedOperationException();
+482             }
+483         };
+484     }
+485 
+486     /**
+487      * Parses the next record from the current point in the stream.
+488      *
+489      * @return the record as an array of values, or {@code null} if the end of the stream has been reached
+490      * @throws IOException
+491      *             on parse error or input read-failure
+492      */
+493     CSVRecord nextRecord() throws IOException {
+494         CSVRecord result = null;
+495         this.record.clear();
+496         StringBuilder sb = null;
+497         final long startCharPosition = lexer.getCharacterPosition() + this.characterOffset;
+498         do {
+499             this.reusableToken.reset();
+500             this.lexer.nextToken(this.reusableToken);
+501             switch (this.reusableToken.type) {
+502             case TOKEN:
+503                 this.addRecordValue(false);
+504                 break;
+505             case EORECORD:
+506                 this.addRecordValue(true);
+507                 break;
+508             case EOF:
+509                 if (this.reusableToken.isReady) {
+510                     this.addRecordValue(true);
+511                 }
+512                 break;
+513             case INVALID:
+514                 throw new IOException("(line " + this.getCurrentLineNumber() + ") invalid parse sequence");
+515             case COMMENT: // Ignored currently
+516                 if (sb == null) { // first comment for this record
+517                     sb = new StringBuilder();
+518                 } else {
+519                     sb.append(Constants.LF);
+520                 }
+521                 sb.append(this.reusableToken.content);
+522                 this.reusableToken.type = TOKEN; // Read another token
+523                 break;
+524             default:
+525                 throw new IllegalStateException("Unexpected Token type: " + this.reusableToken.type);
+526             }
+527         } while (this.reusableToken.type == TOKEN);
+528 
+529         if (!this.record.isEmpty()) {
+530             this.recordNumber++;
+531             final String comment = sb == null ? null : sb.toString();
+532             result = new CSVRecord(this.record.toArray(new String[this.record.size()]), this.headerMap, comment,
+533                     this.recordNumber, startCharPosition);
+534         }
+535         return result;
+536     }
+537 
+538 }
+
+
+ + + \ No newline at end of file