lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chr...@apache.org
Subject svn commit: r1306796 [1/2] - in /lucene/dev/branches/lucene3930/solr: core/src/java/org/apache/solr/handler/ core/src/java/org/apache/solr/internal/ core/src/java/org/apache/solr/internal/csv/ core/src/java/org/apache/solr/internal/csv/writer/ core/src...
Date Thu, 29 Mar 2012 12:01:31 GMT
Author: chrism
Date: Thu Mar 29 12:01:30 2012
New Revision: 1306796

URL: http://svn.apache.org/viewvc?rev=1306796&view=rev
Log:
LUCENE-3930: Merged noggit and commons csv jars into our source code, dropped jars

Added:
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVStrategy.java
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVUtils.java
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CharBuffer.java
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/ExtendedBufferedReader.java
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfig.java
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfigGuesser.java
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVField.java
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVWriter.java
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/noggit/
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/noggit/CharArr.java   (with props)
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/noggit/CharUtil.java   (with props)
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/noggit/JSONParser.java   (with props)
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/noggit/JSONUtil.java   (with props)
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/noggit/JSONWriter.java   (with props)
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/noggit/ObjectBuilder.java
Removed:
    lucene/dev/branches/lucene3930/solr/lib/apache-solr-commons-csv-1.0-SNAPSHOT-r966014.jar
    lucene/dev/branches/lucene3930/solr/lib/apache-solr-commons-csv-LICENSE-ASL.txt
    lucene/dev/branches/lucene3930/solr/lib/apache-solr-commons-csv-NOTICE.txt
    lucene/dev/branches/lucene3930/solr/lib/apache-solr-commons-csv-pom.xml.template
    lucene/dev/branches/lucene3930/solr/lib/apache-solr-noggit-LICENSE-ASL.txt
    lucene/dev/branches/lucene3930/solr/lib/apache-solr-noggit-NOTICE.txt
    lucene/dev/branches/lucene3930/solr/lib/apache-solr-noggit-pom.xml.template
    lucene/dev/branches/lucene3930/solr/lib/apache-solr-noggit-r1211150.jar
Modified:
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/handler/JsonLoader.java
    lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java
    lucene/dev/branches/lucene3930/solr/core/src/test/org/apache/solr/TestGroupingSearch.java
    lucene/dev/branches/lucene3930/solr/core/src/test/org/apache/solr/TestJoin.java
    lucene/dev/branches/lucene3930/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java
    lucene/dev/branches/lucene3930/solr/core/src/test/org/apache/solr/search/TestRealTimeGet.java
    lucene/dev/branches/lucene3930/solr/core/src/test/org/apache/solr/search/TestRecovery.java

Modified: lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/handler/JsonLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/handler/JsonLoader.java?rev=1306796&r1=1306795&r2=1306796&view=diff
==============================================================================
--- lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/handler/JsonLoader.java (original)
+++ lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/handler/JsonLoader.java Thu Mar 29 12:01:30 2012
@@ -24,9 +24,9 @@ import java.util.Map;
 import java.util.Stack;
 
 import org.apache.commons.io.IOUtils;
-import org.apache.noggit.JSONParser;
-import org.apache.noggit.JSONUtil;
-import org.apache.noggit.ObjectBuilder;
+import org.apache.solr.internal.noggit.JSONParser;
+import org.apache.solr.internal.noggit.JSONUtil;
+import org.apache.solr.internal.noggit.ObjectBuilder;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.SolrInputField;

Added: lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java?rev=1306796&view=auto
==============================================================================
--- lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java (added)
+++ lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java Thu Mar 29 12:01:30 2012
@@ -0,0 +1,605 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.internal.csv;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.InputStreamReader;
+import java.io.InputStream;
+import java.util.ArrayList;
+
+
+/**
+ * Parses CSV files according to the specified configuration.
+ *
+ * Because CSV appears in many different dialects, the parser supports many
+ * configuration settings by allowing the specification of a {@link CSVStrategy}.
+ * 
+ * <p>Parsing of a csv-string having tabs as separators,
+ * '"' as an optional value encapsulator, and comments starting with '#':</p>
+ * <pre>
+ *  String[][] data = 
+ *   (new CSVParser(new StringReader("a\tb\nc\td"), new CSVStrategy('\t','"','#'))).getAllValues();
+ * </pre>
+ * 
+ * <p>Parsing of a csv-string in Excel CSV format</p>
+ * <pre>
+ *  String[][] data =
+ *   (new CSVParser(new StringReader("a;b\nc;d"), CSVStrategy.EXCEL_STRATEGY)).getAllValues();
+ * </pre>
+ * 
+ * <p>
+ * Internal parser state is completely covered by the strategy
+ * and the reader-state.</p>
+ * 
+ * <p>see <a href="package-summary.html">package documentation</a> 
+ * for more details</p>
+ */
+public class CSVParser {
+
+  /** length of the initial token (content-)buffer */
+  private static final int INITIAL_TOKEN_LENGTH = 50;
+  
+  // the token types
+  /** Token has no valid content, i.e. is in its initilized state. */
+  protected static final int TT_INVALID = -1;
+  /** Token with content, at beginning or in the middle of a line. */
+  protected static final int TT_TOKEN = 0;
+  /** Token (which can have content) when end of file is reached. */
+  protected static final int TT_EOF = 1;
+  /** Token with content when end of a line is reached. */
+  protected static final int TT_EORECORD = 2;
+
+  /** Immutable empty String array. */
+  private static final String[] EMPTY_STRING_ARRAY = new String[0];
+   
+  // the input stream
+  private final ExtendedBufferedReader in;
+
+  private final CSVStrategy strategy;
+  
+  // the following objects are shared to reduce garbage 
+  /** A record buffer for getLine(). Grows as necessary and is reused. */
+  private final ArrayList record = new ArrayList();
+  private final Token reusableToken = new Token();
+  private final CharBuffer wsBuf = new CharBuffer();
+  private final CharBuffer code = new CharBuffer(4);
+
+  
+  /**
+   * Token is an internal token representation.
+   * 
+   * It is used as contract between the lexer and the parser. 
+   */
+  static class Token {
+    /** Token type, see TT_xxx constants. */
+    int type = TT_INVALID;
+    /** The content buffer. */
+    CharBuffer content = new CharBuffer(INITIAL_TOKEN_LENGTH);
+    /** Token ready flag: indicates a valid token with content (ready for the parser). */
+    boolean isReady;
+    
+    Token reset() {
+        content.clear();
+        type = TT_INVALID;
+        isReady = false;
+        return this;
+    }
+  }
+  
+  // ======================================================
+  //  the constructor
+  // ======================================================
+  
+  /**
+   * Default strategy for the parser follows the default {@link CSVStrategy}.
+   * 
+   * @param input an InputStream containing "csv-formatted" stream
+   * @deprecated use {@link #CSVParser(Reader)}.
+   */
+  public CSVParser(InputStream input) {
+    this(new InputStreamReader(input));
+  }
+  
+  /**
+   * CSV parser using the default {@link CSVStrategy}.
+   * 
+   * @param input a Reader containing "csv-formatted" input
+   */
+  public CSVParser(Reader input) {
+    // note: must match default-CSV-strategy !!
+    this(input, ',');
+  }
+  
+  /**
+   * Customized value delimiter parser.
+   * 
+   * The parser follows the default {@link CSVStrategy}
+   * except for the delimiter setting.
+   * 
+   * @param input a Reader based on "csv-formatted" input
+   * @param delimiter a Char used for value separation
+   * @deprecated use {@link #CSVParser(Reader,CSVStrategy)}.
+   */
+  public CSVParser(Reader input, char delimiter) {
+    this(input, delimiter, '"', CSVStrategy.COMMENTS_DISABLED);
+  }
+  
+  /**
+   * Customized csv parser.
+   * 
+   * The parser parses according to the given CSV dialect settings.
+   * Leading whitespaces are truncated, unicode escapes are
+   * not interpreted and empty lines are ignored.
+   * 
+   * @param input a Reader based on "csv-formatted" input
+   * @param delimiter a Char used for value separation
+   * @param encapsulator a Char used as value encapsulation marker
+   * @param commentStart a Char used for comment identification
+   * @deprecated use {@link #CSVParser(Reader,CSVStrategy)}.
+   */
+  public CSVParser(Reader input, char delimiter, char encapsulator, char commentStart) {
+    this(input, new CSVStrategy(delimiter, encapsulator, commentStart));
+  }
+
+  /**
+   * Customized CSV parser using the given {@link CSVStrategy}
+   *
+   * @param input a Reader containing "csv-formatted" input
+   * @param strategy the CSVStrategy used for CSV parsing
+   */
+  public CSVParser(Reader input, CSVStrategy strategy) {
+    this.in = new ExtendedBufferedReader(input);
+    this.strategy = strategy;
+  }
+  
+  // ======================================================
+  //  the parser
+  // ======================================================
+  
+  /**
+   * Parses the CSV according to the given strategy
+   * and returns the content as an array of records
+   * (whereas records are arrays of single values).
+   * <p>
+   * The returned content starts at the current parse-position in
+   * the stream.
+   * 
+   * @return matrix of records x values ('null' when end of file)
+   * @throws IOException on parse error or input read-failure
+   */
+  public String[][] getAllValues() throws IOException {
+    ArrayList records = new ArrayList();
+    String[] values;
+    String[][] ret = null;
+    while ((values = getLine()) != null)  {
+      records.add(values);
+    }
+    if (records.size() > 0) {
+      ret = new String[records.size()][];
+      records.toArray(ret);
+    }
+    return ret;
+  }
+  
+  /**
+   * Parses the CSV according to the given strategy
+   * and returns the next csv-value as string.
+   * 
+   * @return next value in the input stream ('null' when end of file)
+   * @throws IOException on parse error or input read-failure
+   */
+  public String nextValue() throws IOException {
+    Token tkn = nextToken();
+    String ret = null;
+    switch (tkn.type) {
+      case TT_TOKEN:
+      case TT_EORECORD: 
+        ret = tkn.content.toString();
+        break;
+      case TT_EOF:
+        ret = null;
+        break;
+      case TT_INVALID:
+      default:
+        // error no token available (or error)
+        throw new IOException(
+          "(line " + getLineNumber() 
+          + ") invalid parse sequence");
+        // unreachable: break;
+    }
+    return ret;
+  }
+  
+  /**
+   * Parses from the current point in the stream til
+   * the end of the current line.
+   * 
+   * @return array of values til end of line 
+   *        ('null' when end of file has been reached)
+   * @throws IOException on parse error or input read-failure
+   */
+  public String[] getLine() throws IOException {
+    String[] ret = EMPTY_STRING_ARRAY;
+    record.clear();
+    while (true) {
+        reusableToken.reset();
+        nextToken(reusableToken);
+        switch (reusableToken.type) {
+            case TT_TOKEN:
+                record.add(reusableToken.content.toString());
+                break;
+            case TT_EORECORD:
+                record.add(reusableToken.content.toString());
+                break;
+            case TT_EOF:
+                if (reusableToken.isReady) {
+                    record.add(reusableToken.content.toString());
+                } else {
+                    ret = null;
+                }
+                break;
+            case TT_INVALID:
+            default:
+                // error: throw IOException
+                throw new IOException("(line " + getLineNumber() + ") invalid parse sequence");
+            // unreachable: break;
+        }
+        if (reusableToken.type != TT_TOKEN) {
+            break;
+        }
+    }
+    if (!record.isEmpty()) {
+      ret = (String[]) record.toArray(new String[record.size()]);
+    }
+    return ret;
+  }
+  
+  /**
+   * Returns the current line number in the input stream.
+   * 
+   * ATTENTION: in case your csv has multiline-values the returned
+   *            number does not correspond to the record-number
+   * 
+   * @return  current line number
+   */
+  public int getLineNumber() {
+    return in.getLineNumber();  
+  }
+  
+  // ======================================================
+  //  the lexer(s)
+  // ======================================================
+ 
+  /**
+   * Convenience method for <code>nextToken(null)</code>.
+   */
+  protected Token nextToken() throws IOException {
+      return nextToken(new Token());
+  }
+  
+ /**
+   * Returns the next token.
+   * 
+   * A token corresponds to a term, a record change or an
+   * end-of-file indicator.
+   * 
+   * @param tkn an existing Token object to reuse. The caller is responsible to initialize the
+   * Token.
+   * @return the next token found
+   * @throws IOException on stream access error
+   */
+  protected Token nextToken(Token tkn) throws IOException {
+    wsBuf.clear(); // resuse
+    
+    // get the last read char (required for empty line detection)
+    int lastChar = in.readAgain();
+    
+    //  read the next char and set eol
+    /* note: unfourtunately isEndOfLine may consumes a character silently.
+     *       this has no effect outside of the method. so a simple workaround
+     *       is to call 'readAgain' on the stream...
+     *       uh: might using objects instead of base-types (jdk1.5 autoboxing!)
+     */
+    int c = in.read();
+    boolean eol = isEndOfLine(c);
+    c = in.readAgain();
+     
+    //  empty line detection: eol AND (last char was EOL or beginning)
+    while (strategy.getIgnoreEmptyLines() && eol 
+      && (lastChar == '\n' 
+      || lastChar == ExtendedBufferedReader.UNDEFINED) 
+      && !isEndOfFile(lastChar)) {
+      // go on char ahead ...
+      lastChar = c;
+      c = in.read();
+      eol = isEndOfLine(c);
+      c = in.readAgain();
+      // reached end of file without any content (empty line at the end)
+      if (isEndOfFile(c)) {
+        tkn.type = TT_EOF;
+        return tkn;
+      }
+    }
+
+    // did we reached eof during the last iteration already ? TT_EOF
+    if (isEndOfFile(lastChar) || (lastChar != strategy.getDelimiter() && isEndOfFile(c))) {
+      tkn.type = TT_EOF;
+      return tkn;
+    } 
+    
+    //  important: make sure a new char gets consumed in each iteration
+    while (!tkn.isReady && tkn.type != TT_EOF) {
+      // ignore whitespaces at beginning of a token
+      while (strategy.getIgnoreLeadingWhitespaces() && isWhitespace(c) && !eol) {
+        wsBuf.append((char) c);
+        c = in.read();
+        eol = isEndOfLine(c);
+      }
+      // ok, start of token reached: comment, encapsulated, or token
+      if (c == strategy.getCommentStart()) {
+        // ignore everything till end of line and continue (incr linecount)
+        in.readLine();
+        tkn = nextToken(tkn.reset());
+      } else if (c == strategy.getDelimiter()) {
+        // empty token return TT_TOKEN("")
+        tkn.type = TT_TOKEN;
+        tkn.isReady = true;
+      } else if (eol) {
+        // empty token return TT_EORECORD("")
+        //noop: tkn.content.append("");
+        tkn.type = TT_EORECORD;
+        tkn.isReady = true;
+      } else if (c == strategy.getEncapsulator()) {
+        // consume encapsulated token
+        encapsulatedTokenLexer(tkn, c);
+      } else if (isEndOfFile(c)) {
+        // end of file return TT_EOF()
+        //noop: tkn.content.append("");
+        tkn.type = TT_EOF;
+        tkn.isReady = true;
+      } else {
+        // next token must be a simple token
+        // add removed blanks when not ignoring whitespace chars...
+        if (!strategy.getIgnoreLeadingWhitespaces()) {
+          tkn.content.append(wsBuf);
+        }
+        simpleTokenLexer(tkn, c);
+      }
+    }
+    return tkn;  
+  }
+  
+  /**
+   * A simple token lexer
+   * 
+   * Simple token are tokens which are not surrounded by encapsulators.
+   * A simple token might contain escaped delimiters (as \, or \;). The
+   * token is finished when one of the following conditions become true:
+   * <ul>
+   *   <li>end of line has been reached (TT_EORECORD)</li>
+   *   <li>end of stream has been reached (TT_EOF)</li>
+   *   <li>an unescaped delimiter has been reached (TT_TOKEN)</li>
+   * </ul>
+   *  
+   * @param tkn  the current token
+   * @param c    the current character
+   * @return the filled token
+   * 
+   * @throws IOException on stream access error
+   */
+  private Token simpleTokenLexer(Token tkn, int c) throws IOException {
+    for (;;) {
+      if (isEndOfLine(c)) {
+        // end of record
+        tkn.type = TT_EORECORD;
+        tkn.isReady = true;
+        break;
+      } else if (isEndOfFile(c)) {
+        // end of file
+        tkn.type = TT_EOF;
+        tkn.isReady = true;
+        break;
+      } else if (c == strategy.getDelimiter()) {
+        // end of token
+        tkn.type = TT_TOKEN;
+        tkn.isReady = true;
+        break;
+      } else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') {
+        // interpret unicode escaped chars (like \u0070 -> p)
+        tkn.content.append((char) unicodeEscapeLexer(c));
+      } else if (c == strategy.getEscape()) {
+        tkn.content.append((char)readEscape(c));
+      } else {
+        tkn.content.append((char) c);
+      }
+      
+      c = in.read();
+    }
+
+    if (strategy.getIgnoreTrailingWhitespaces()) {
+      tkn.content.trimTrailingWhitespace();
+    }
+
+    return tkn;
+  }
+  
+  
+  /**
+   * An encapsulated token lexer
+   * 
+   * Encapsulated tokens are surrounded by the given encapsulating-string.
+   * The encapsulator itself might be included in the token using a
+   * doubling syntax (as "", '') or using escaping (as in \", \').
+   * Whitespaces before and after an encapsulated token are ignored.
+   * 
+   * @param tkn    the current token
+   * @param c      the current character
+   * @return a valid token object
+   * @throws IOException on invalid state
+   */
+  private Token encapsulatedTokenLexer(Token tkn, int c) throws IOException {
+    // save current line
+    int startLineNumber = getLineNumber();
+    // ignore the given delimiter
+    // assert c == delimiter;
+    for (;;) {
+      c = in.read();
+
+      if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead()=='u') {
+        tkn.content.append((char) unicodeEscapeLexer(c));
+      } else if (c == strategy.getEscape()) {
+        tkn.content.append((char)readEscape(c));
+      } else if (c == strategy.getEncapsulator()) {
+        if (in.lookAhead() == strategy.getEncapsulator()) {
+          // double or escaped encapsulator -> add single encapsulator to token
+          c = in.read();
+          tkn.content.append((char) c);
+        } else {
+          // token finish mark (encapsulator) reached: ignore whitespace till delimiter
+          for (;;) {
+            c = in.read();
+            if (c == strategy.getDelimiter()) {
+              tkn.type = TT_TOKEN;
+              tkn.isReady = true;
+              return tkn;
+            } else if (isEndOfFile(c)) {
+              tkn.type = TT_EOF;
+              tkn.isReady = true;
+              return tkn;
+            } else if (isEndOfLine(c)) {
+              // ok eo token reached
+              tkn.type = TT_EORECORD;
+              tkn.isReady = true;
+              return tkn;
+            } else if (!isWhitespace(c)) {
+              // error invalid char between token and next delimiter
+              throw new IOException(
+                      "(line " + getLineNumber()
+                              + ") invalid char between encapsulated token end delimiter"
+              );
+            }
+          }
+        }
+      } else if (isEndOfFile(c)) {
+        // error condition (end of file before end of token)
+        throw new IOException(
+                "(startline " + startLineNumber + ")"
+                        + "eof reached before encapsulated token finished"
+        );
+      } else {
+        // consume character
+        tkn.content.append((char) c);
+      }
+    }
+  }
+  
+  
+  /**
+   * Decodes Unicode escapes.
+   * 
+   * Interpretation of "\\uXXXX" escape sequences
+   * where XXXX is a hex-number.
+   * @param c current char which is discarded because it's the "\\" of "\\uXXXX"
+   * @return the decoded character
+   * @throws IOException on wrong unicode escape sequence or read error
+   */
+  protected int unicodeEscapeLexer(int c) throws IOException {
+    int ret = 0;
+    // ignore 'u' (assume c==\ now) and read 4 hex digits
+    c = in.read();
+    code.clear();
+    try {
+      for (int i = 0; i < 4; i++) {
+        c  = in.read();
+        if (isEndOfFile(c) || isEndOfLine(c)) {
+          throw new NumberFormatException("number too short");
+        }
+        code.append((char) c);
+      }
+      ret = Integer.parseInt(code.toString(), 16);
+    } catch (NumberFormatException e) {
+      throw new IOException(
+        "(line " + getLineNumber() + ") Wrong unicode escape sequence found '" 
+        + code.toString() + "'" + e.toString());
+    }
+    return ret;
+  }
+
+  private int readEscape(int c) throws IOException {
+    // assume c is the escape char (normally a backslash)
+    c = in.read();
+    int out;
+    switch (c) {
+      case 'r': out='\r'; break;
+      case 'n': out='\n'; break;
+      case 't': out='\t'; break;
+      case 'b': out='\b'; break;
+      case 'f': out='\f'; break;
+      default : out=c;
+    }
+    return out;
+  }
+  
+  // ======================================================
+  //  strategies
+  // ======================================================
+  
+  /**
+   * Obtain the specified CSV Strategy.  This should not be modified.
+   * 
+   * @return strategy currently being used
+   */
+  public CSVStrategy getStrategy() {
+    return this.strategy;
+  }
+  
+  // ======================================================
+  //  Character class checker
+  // ======================================================
+  
+  /**
+   * @return true if the given char is a whitespace character
+   */
+  private boolean isWhitespace(int c) {
+    return Character.isWhitespace((char) c) && (c != strategy.getDelimiter());
+  }
+  
+  /**
+   * Greedy - accepts \n and \r\n 
+   * This checker consumes silently the second control-character...
+   * 
+   * @return true if the given character is a line-terminator
+   */
+  private boolean isEndOfLine(int c) throws IOException {
+    // check if we have \r\n...
+    if (c == '\r') {
+      if (in.lookAhead() == '\n') {
+        // note: does not change c outside of this method !!
+        c = in.read();
+      }
+    }
+    return (c == '\n');
+  }
+  
+  /**
+   * @return true if the given character indicates end of file
+   */
+  private boolean isEndOfFile(int c) {
+    return c == ExtendedBufferedReader.END_OF_STREAM;
+  }
+}

Added: lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java?rev=1306796&view=auto
==============================================================================
--- lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java (added)
+++ lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java Thu Mar 29 12:01:30 2012
@@ -0,0 +1,307 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.internal.csv;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.io.Writer;
+
+/**
+ * Print values as a comma separated list.
+ */
+public class CSVPrinter {
+
+  /** The place that the values get written. */
+  protected final Writer out;
+  protected final CSVStrategy strategy;
+
+  /** True if we just began a new line. */
+  protected boolean newLine = true;
+
+  protected char[] buf = new char[0];  // temporary buffer
+
+  /**
+   * Create a printer that will print values to the given
+   * stream following the CSVStrategy.
+   *
+   * Currently, only a pure encapsulation strategy or a pure escaping strategy
+   * is supported.  Hybrid strategies (encapsulation and escaping with a different character) are not supported.
+   *
+   * @param out stream to which to print.
+   * @param strategy describes the CSV variation.
+   */
+  public CSVPrinter(Writer out, CSVStrategy strategy) {
+    this.out = out;
+    this.strategy = strategy==null ? CSVStrategy.DEFAULT_STRATEGY : strategy;
+  }
+  
+  // ======================================================
+  //  printing implementation
+  // ======================================================
+
+  /**
+   * Output a blank line
+   */
+  public void println() throws IOException {
+    out.write(strategy.getPrinterNewline());
+    newLine = true;
+  }
+
+  public void flush() throws IOException {
+    out.flush();
+  }
+
+
+  /**
+   * Print a single line of comma separated values.
+   * The values will be quoted if needed.  Quotes and
+   * newLine characters will be escaped.
+   *
+   * @param values values to be outputted.
+   */
+  public void println(String[] values) throws IOException {
+    for (int i = 0; i < values.length; i++) {
+      print(values[i]);
+    }
+    println();
+  }
+
+
+  /**
+   * Put a comment among the comma separated values.
+   * Comments will always begin on a new line and occupy a
+   * least one full line. The character specified to star
+   * comments and a space will be inserted at the beginning of
+   * each new line in the comment.
+   *
+   * @param comment the comment to output
+   */
+  public void printlnComment(String comment) throws IOException {
+    if(this.strategy.isCommentingDisabled()) {
+        return;
+    }
+    if (!newLine) {
+      println();
+    }
+    out.write(this.strategy.getCommentStart());
+    out.write(' ');
+    for (int i = 0; i < comment.length(); i++) {
+      char c = comment.charAt(i);
+      switch (c) {
+        case '\r' :
+          if (i + 1 < comment.length() && comment.charAt(i + 1) == '\n') {
+            i++;
+          }
+          // break intentionally excluded.
+        case '\n' :
+          println();
+          out.write(this.strategy.getCommentStart());
+          out.write(' ');
+          break;
+        default :
+          out.write(c);
+          break;
+      }
+    }
+    println();
+  }
+
+
+  public void print(char[] value, int offset, int len, boolean checkForEscape) throws IOException {
+    if (!checkForEscape) {
+      printSep();
+      out.write(value, offset, len);
+      return;
+    }
+
+    if (strategy.getEncapsulator() != CSVStrategy.ENCAPSULATOR_DISABLED) {
+      printAndEncapsulate(value, offset, len);
+    } else if (strategy.getEscape() != CSVStrategy.ESCAPE_DISABLED) {
+      printAndEscape(value, offset, len);
+    } else {
+      printSep();
+      out.write(value, offset, len);
+    }
+  }
+
+  void printSep() throws IOException {
+    if (newLine) {
+      newLine = false;
+    } else {
+      out.write(this.strategy.getDelimiter());
+    }
+  }
+
+  void printAndEscape(char[] value, int offset, int len) throws IOException {
+    int start = offset;
+    int pos = offset;
+    int end = offset + len;
+
+    printSep();
+
+    char delim = this.strategy.getDelimiter();
+    char escape = this.strategy.getEscape();
+
+    while (pos < end) {
+      char c = value[pos];
+      if (c == '\r' || c=='\n' || c==delim || c==escape) {
+        // write out segment up until this char
+        int l = pos-start;
+        if (l>0) {
+          out.write(value, start, l);
+        }
+        if (c=='\n') c='n';
+        else if (c=='\r') c='r';
+
+        out.write(escape);
+        out.write(c);
+
+        start = pos+1; // start on the current char after this one
+      }
+
+      pos++;
+    }
+
+    // write last segment
+    int l = pos-start;
+    if (l>0) {
+      out.write(value, start, l);      
+    }
+  }
+
+  void printAndEncapsulate(char[] value, int offset, int len) throws IOException {
+    boolean first = newLine;  // is this the first value on this line?
+    boolean quote = false;
+    int start = offset;
+    int pos = offset;
+    int end = offset + len;
+
+    printSep();    
+
+    char delim = this.strategy.getDelimiter();
+    char encapsulator = this.strategy.getEncapsulator();
+
+    if (len <= 0) {
+      // always quote an empty token that is the first
+      // on the line, as it may be the only thing on the
+      // line. If it were not quoted in that case,
+      // an empty line has no tokens.
+      if (first) {
+        quote = true;
+      }
+    } else {
+      char c = value[pos];
+
+      // Hmmm, where did this rule come from?
+      if (first
+          && (c < '0'
+          || (c > '9' && c < 'A')
+          || (c > 'Z' && c < 'a')
+          || (c > 'z'))) {
+        quote = true;
+      // } else if (c == ' ' || c == '\f' || c == '\t') {
+      } else if (c <= '#') {
+        // Some other chars at the start of a value caused the parser to fail, so for now
+        // encapsulate if we start in anything less than '#'.  We are being conservative
+        // by including the default comment char too.
+        quote = true;
+      } else {
+        while (pos < end) {
+          c = value[pos];
+          if (c=='\n' || c=='\r' || c==encapsulator || c==delim) {
+            quote = true;
+            break;
+          }
+          pos++;
+        }
+
+        if (!quote) {
+          pos = end-1;
+          c = value[pos];
+          // if (c == ' ' || c == '\f' || c == '\t') {
+          // Some other chars at the end caused the parser to fail, so for now
+          // encapsulate if we end in anything less than ' '
+          if (c <= ' ') {
+            quote = true;
+          }
+        }
+      }
+    }
+
+    if (!quote) {
+      // no encapsulation needed - write out the original value
+      out.write(value, offset, len);
+      return;
+    }
+
+    // we hit something that needed encapsulation
+    out.write(encapsulator);
+
+    // Pick up where we left off: pos should be positioned on the first character that caused
+    // the need for encapsulation.
+    while (pos<end) {
+      char c = value[pos];
+      if (c==encapsulator) {
+        // write out the chunk up until this point
+
+        // add 1 to the length to write out the encapsulator also
+        out.write(value, start, pos-start+1);
+        // put the next starting position on the encapsulator so we will
+        // write it out again with the next string (effectively doubling it)
+        start = pos;
+      }
+      pos++;
+    }
+
+    // write the last segment
+    out.write(value, start, pos-start);
+    out.write(encapsulator);    
+  }
+
+  /**
+   * Print the string as the next value on the line. The value
+   * will be escaped or encapsulated as needed if checkForEscape==true
+   *
+   * @param value value to be outputted.
+   */
+  public void print(String value, boolean checkForEscape) throws IOException {
+    if (!checkForEscape) {
+      // write directly from string
+      printSep();
+      out.write(value);
+      return;
+    }
+
+    if (buf.length < value.length()) {
+      buf = new char[value.length()];
+    }
+
+    value.getChars(0, value.length(), buf, 0);
+    print(buf, 0, value.length(), checkForEscape);
+  }
+
+  /**
+   * Print the string as the next value on the line. The value
+   * will be escaped or encapsulated as needed.
+   *
+   * @param value value to be outputted.
+   */
+  public void print(String value) throws IOException {
+    print(value, true);   
+  }
+}

Added: lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVStrategy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVStrategy.java?rev=1306796&view=auto
==============================================================================
--- lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVStrategy.java (added)
+++ lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVStrategy.java Thu Mar 29 12:01:30 2012
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.internal.csv;
+
+import java.io.Serializable;
+
+/**
+ * CSVStrategy
+ * 
+ * Represents the strategy for a CSV.
+ */
+public class CSVStrategy implements Cloneable, Serializable {
+
+    private char delimiter;
+    private char encapsulator;
+    private char commentStart;
+    private char escape;
+    private boolean ignoreLeadingWhitespaces;
+    private boolean ignoreTrailingWhitespaces;
+    private boolean interpretUnicodeEscapes;
+    private boolean ignoreEmptyLines;
+
+    // controls for output
+    private String printerNewline = "\n";
+
+    // -2 is used to signal disabled, because it won't be confused with
+    // an EOF signal (-1), and because \ufffe in UTF-16 would be
+    // encoded as two chars (using surrogates) and thus there should never
+    // be a collision with a real text char.
+    public static char COMMENTS_DISABLED       = (char)-2;
+    public static char ESCAPE_DISABLED         = (char)-2;
+    public static char ENCAPSULATOR_DISABLED   = (char)-2;
+
+    public static CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, true, 
+                                                                 true, false, true);
+    public static CSVStrategy EXCEL_STRATEGY   = new CSVStrategy(',', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, false, 
+                                                                 false, false, false);
+    public static CSVStrategy TDF_STRATEGY     = new CSVStrategy('\t', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, true, 
+                                                                 true, false, true);
+
+
+    public CSVStrategy(char delimiter, char encapsulator, char commentStart) {
+        this(delimiter, encapsulator, commentStart, true, false, true);
+    }
+  
+    /**
+     * Customized CSV strategy setter.
+     * 
+     * @param delimiter a Char used for value separation
+     * @param encapsulator a Char used as value encapsulation marker
+     * @param commentStart a Char used for comment identification
+     * @param ignoreLeadingWhitespace TRUE when leading whitespaces should be
+     *                                ignored
+     * @param interpretUnicodeEscapes TRUE when unicode escapes should be 
+     *                                interpreted
+     * @param ignoreEmptyLines TRUE when the parser should skip emtpy lines
+     */
+    public CSVStrategy(
+        char delimiter, 
+        char encapsulator, 
+        char commentStart,
+        char escape,
+        boolean ignoreLeadingWhitespace, 
+        boolean ignoreTrailingWhitespace, 
+        boolean interpretUnicodeEscapes,
+        boolean ignoreEmptyLines) 
+    {
+        setDelimiter(delimiter);
+        setEncapsulator(encapsulator);
+        setCommentStart(commentStart);
+        setEscape(escape);
+        setIgnoreLeadingWhitespaces(ignoreLeadingWhitespace);
+        setIgnoreTrailingWhitespaces(ignoreTrailingWhitespace);
+        setUnicodeEscapeInterpretation(interpretUnicodeEscapes);
+        setIgnoreEmptyLines(ignoreEmptyLines);
+    }
+
+    /** @deprecated */
+    public CSVStrategy(
+        char delimiter,
+        char encapsulator,
+        char commentStart,
+        boolean ignoreLeadingWhitespace,
+        boolean interpretUnicodeEscapes,
+        boolean ignoreEmptyLines)
+    {
+        this(delimiter, encapsulator, commentStart, CSVStrategy.ESCAPE_DISABLED, ignoreLeadingWhitespace, 
+             true, interpretUnicodeEscapes, ignoreEmptyLines);
+    }
+
+    public void setDelimiter(char delimiter) { this.delimiter = delimiter; }
+    public char getDelimiter() { return this.delimiter; }
+
+    public void setEncapsulator(char encapsulator) { this.encapsulator = encapsulator; }
+    public char getEncapsulator() { return this.encapsulator; }
+
+    public void setCommentStart(char commentStart) { this.commentStart = commentStart; }
+    public char getCommentStart() { return this.commentStart; }
+    public boolean isCommentingDisabled() { return this.commentStart == COMMENTS_DISABLED; }
+
+    public void setEscape(char escape) { this.escape = escape; }
+    public char getEscape() { return this.escape; }
+
+    public void setIgnoreLeadingWhitespaces(boolean ignoreLeadingWhitespaces) { 
+        this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces; 
+    }
+    public boolean getIgnoreLeadingWhitespaces() { return this.ignoreLeadingWhitespaces; }
+
+    public void setIgnoreTrailingWhitespaces(boolean ignoreTrailingWhitespaces) { 
+        this.ignoreTrailingWhitespaces = ignoreTrailingWhitespaces; 
+    }
+    public boolean getIgnoreTrailingWhitespaces() { return this.ignoreTrailingWhitespaces; }
+
+    public void setUnicodeEscapeInterpretation(boolean interpretUnicodeEscapes) { 
+        this.interpretUnicodeEscapes = interpretUnicodeEscapes; 
+    }
+    public boolean getUnicodeEscapeInterpretation() { return this.interpretUnicodeEscapes; }
+
+    public void setIgnoreEmptyLines(boolean ignoreEmptyLines) { this.ignoreEmptyLines = ignoreEmptyLines; }
+    public boolean getIgnoreEmptyLines() { return this.ignoreEmptyLines; }
+
+    public void setPrinterNewline(String newline) {
+      this.printerNewline = newline;
+    }
+    public String getPrinterNewline() {
+      return this.printerNewline;
+    }
+
+    public Object clone() {
+      try {
+        return super.clone();
+      } catch (CloneNotSupportedException e) {
+        throw new RuntimeException(e);  // impossible
+      }
+    }
+}

Added: lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVUtils.java?rev=1306796&view=auto
==============================================================================
--- lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVUtils.java (added)
+++ lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CSVUtils.java Thu Mar 29 12:01:30 2012
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.internal.csv;
+
+import java.io.StringWriter;
+import java.io.StringReader;
+import java.io.IOException;
+
+/**
+ * Utility methods for dealing with CSV files
+ */
+public class CSVUtils {
+
+    private static final String[] EMPTY_STRING_ARRAY = new String[0];
+    private static final String[][] EMPTY_DOUBLE_STRING_ARRAY = new String[0][0];
+
+    /**
+     * <p><code>CSVUtils</code> instances should NOT be constructed in
+     * standard programming. 
+     *
+     * <p>This constructor is public to permit tools that require a JavaBean
+     * instance to operate.</p>
+     */
+    public CSVUtils() {
+    }
+  
+    /**
+     * Converts an array of string values into a single CSV line. All
+     * <code>null</code> values are converted to the string <code>"null"</code>,
+     * all strings equal to <code>"null"</code> will additionally get quotes
+     * around.
+     *
+     * @param values the value array
+     * @return the CSV string, will be an empty string if the length of the
+     * value array is 0
+     */
+    public static String printLine(String[] values, CSVStrategy strategy) {
+        // set up a CSVUtils
+        StringWriter stringWriter = new StringWriter();
+        CSVPrinter csvPrinter = new CSVPrinter(stringWriter, strategy);
+  
+        // check for null values an "null" as strings and convert them
+        // into the strings "null" and "\"null\""
+        for (int i = 0; i < values.length; i++) {
+            if (values[i] == null) {
+                values[i] = "null";
+            } else if (values[i].equals("null")) {
+                values[i] = "\"null\"";
+            }
+        }
+  
+        // convert to CSV
+        try {
+          csvPrinter.println(values);
+        } catch (IOException e) {
+          // should not happen with StringWriter
+        }
+        // as the resulting string has \r\n at the end, we will trim that away
+        return stringWriter.toString().trim();
+    }
+  
+  // ======================================================
+  //  static parsers
+  // ======================================================
+  
+  /**
+   * Parses the given String according to the default {@link CSVStrategy}.
+   * 
+   * @param s CSV String to be parsed.
+   * @return parsed String matrix (which is never null)
+   * @throws IOException in case of error
+   */
+  public static String[][] parse(String s) throws IOException {
+    if (s == null) {
+      throw new IllegalArgumentException("Null argument not allowed.");
+    }
+    String[][] result = (new CSVParser(new StringReader(s))).getAllValues();
+    if (result == null) {
+      // since CSVStrategy ignores empty lines an empty array is returned
+      // (i.e. not "result = new String[][] {{""}};")
+      result = EMPTY_DOUBLE_STRING_ARRAY;
+    }
+    return result;
+  }
+  
+  /**
+   * Parses the first line only according to the default {@link CSVStrategy}.
+   * 
+   * Parsing empty string will be handled as valid records containing zero
+   * elements, so the following property holds: parseLine("").length == 0.
+   * 
+   * @param s CSV String to be parsed.
+   * @return parsed String vector (which is never null)
+   * @throws IOException in case of error
+   */
+  public static String[] parseLine(String s) throws IOException {
+    if (s == null) {
+      throw new IllegalArgumentException("Null argument not allowed.");
+    }
+    // uh,jh: make sure that parseLine("").length == 0
+    if (s.length() == 0) {
+      return EMPTY_STRING_ARRAY;
+    }
+    return (new CSVParser(new StringReader(s))).getLine();
+  }
+  
+}

Added: lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CharBuffer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CharBuffer.java?rev=1306796&view=auto
==============================================================================
--- lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CharBuffer.java (added)
+++ lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/CharBuffer.java Thu Mar 29 12:01:30 2012
@@ -0,0 +1,226 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.solr.internal.csv;
+
+/**
+ * A simple StringBuffer replacement that aims to 
+ * reduce copying as much as possible. The buffer
+ * grows as necessary.
+ * This class is not thread safe.
+ * 
+ * @author Ortwin Gl�ck
+ */
+public class CharBuffer {
+
+    private char[] c;
+
+    /**
+     * Actually used number of characters in the array. 
+     * It is also the index at which
+     * a new character will be inserted into <code>c</code>. 
+     */ 
+    private int length;
+    
+    /**
+     * Creates a new CharBuffer with an initial capacity of 32 characters.
+     */
+    public CharBuffer() {
+        this(32);
+    }
+    
+    /**
+     * Creates a new CharBuffer with an initial capacity 
+     * of <code>length</code> characters.
+     */
+    public CharBuffer(final int length) {
+        if (length == 0) {
+            throw new IllegalArgumentException("Can't create an empty CharBuffer");
+        }
+        this.c = new char[length];
+    }
+    
+    /**
+     * Empties the buffer. The capacity still remains the same, so no memory is freed.
+     */
+    public void clear() {
+        length = 0;
+    }
+    
+    /**
+     * Returns the number of characters in the buffer.
+     * @return the number of characters
+     */
+    public int length() {
+        return length;
+    }
+
+    /**
+     * Returns the current capacity of the buffer.
+     * @return the maximum number of characters that can be stored in this buffer without
+     * resizing it.
+     */
+    public int capacity() {
+        return c.length;
+    }
+
+    
+    /**
+     * Appends the contents of <code>cb</code> to the end of this CharBuffer.
+     * @param cb the CharBuffer to append or null
+     */
+    public void append(final CharBuffer cb) {
+        if (cb == null) {
+            return;
+        }
+        provideCapacity(length + cb.length);
+        System.arraycopy(cb.c, 0, c, length, cb.length);
+        length += cb.length;
+    }
+    
+    /**
+     * Appends <code>s</code> to the end of this CharBuffer.
+     * This method involves copying the new data once!
+     * @param s the String to append or null
+     */
+    public void append(final String s) {
+        if (s == null) {
+            return;
+        }
+        append(s.toCharArray());
+    }
+    
+    /**
+     * Appends <code>sb</code> to the end of this CharBuffer.
+     * This method involves copying the new data once!
+     * @param sb the StringBuffer to append or null
+     */
+    public void append(final StringBuffer sb) {
+        if (sb == null) {
+            return;
+        }
+        provideCapacity(length + sb.length());
+        sb.getChars(0, sb.length(), c, length);
+        length += sb.length();
+    }
+    
+    /**
+     * Appends <code>data</code> to the end of this CharBuffer.
+     * This method involves copying the new data once!
+     * @param data the char[] to append or null
+     */
+    public void append(final char[] data) {
+        if (data == null) {
+            return;
+        }
+        provideCapacity(length + data.length);
+        System.arraycopy(data, 0, c, length, data.length);
+        length += data.length;
+    }
+    
+    /**
+     * Appends a single character to the end of this CharBuffer.
+     * This method involves copying the new data once!
+     * @param data the char to append
+     */
+    public void append(final char data) {
+        provideCapacity(length + 1);
+        c[length] = data;
+        length++;
+    }
+    
+    /**
+     * Shrinks the capacity of the buffer to the current length if necessary.
+     * This method involves copying the data once!
+     */
+    public void shrink() {
+        if (c.length == length) {
+            return;
+        }
+        char[] newc = new char[length];
+        System.arraycopy(c, 0, newc, 0, length);
+        c = newc;
+    }
+
+   /**
+    * Removes trailing whitespace.
+    */
+    public void trimTrailingWhitespace() {
+      while (length>0 && Character.isWhitespace(c[length-1])) {
+        length--;
+      }
+    }
+
+    /**
+     * Returns the contents of the buffer as a char[]. The returned array may
+     * be the internal array of the buffer, so the caller must take care when
+     * modifying it.
+     * This method allows to avoid copying if the caller knows the exact capacity
+     * before.
+     * @return
+     */
+    public char[] getCharacters() {
+        if (c.length == length) {
+            return c;
+        }
+        char[] chars = new char[length];
+        System.arraycopy(c, 0, chars, 0, length);
+        return chars;
+    }
+
+   /**
+    * Returns the character at the specified position.
+    */
+    public char charAt(int pos) {
+      return c[pos];
+   }
+
+    /**
+     * Converts the contents of the buffer into a StringBuffer.
+     * This method involves copying the new data once!
+     * @return
+     */
+    public StringBuffer toStringBuffer() {
+        StringBuffer sb = new StringBuffer(length);
+        sb.append(c, 0, length);
+        return sb;
+    }
+    
+    /**
+     * Converts the contents of the buffer into a StringBuffer.
+     * This method involves copying the new data once!
+     * @return
+     */
+    public String toString() {
+        return new String(c, 0, length);
+    }
+    
+    /**
+     * Copies the data into a new array of at least <code>capacity</code> size.
+     * @param capacity
+     */
+    public void provideCapacity(final int capacity) {
+        if (c.length >= capacity) {
+            return;
+        }
+        int newcapacity = ((capacity*3)>>1) + 1;
+        char[] newc = new char[newcapacity];
+        System.arraycopy(c, 0, newc, 0, length);
+        c = newc;
+    }
+}

Added: lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/ExtendedBufferedReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/ExtendedBufferedReader.java?rev=1306796&view=auto
==============================================================================
--- lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/ExtendedBufferedReader.java (added)
+++ lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/ExtendedBufferedReader.java Thu Mar 29 12:01:30 2012
@@ -0,0 +1,312 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.internal.csv;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * ExtendedBufferedReader
+ *
+ * A special reader decorater which supports more
+ * sophisticated access to the underlying reader object.
+ * 
+ * In particular the reader supports a look-ahead option,
+ * which allows you to see the next char returned by
+ * next().
+ * Furthermore the skip-method supports skipping until
+ * (but excluding) a given char. Similar functionality
+ * is supported by the reader as well.
+ * 
+ */
+class ExtendedBufferedReader extends BufferedReader  {
+
+  
+  /** the end of stream symbol */
+  public static final int END_OF_STREAM = -1;
+  /** undefined state for the lookahead char */
+  public static final int UNDEFINED = -2;
+  
+  /** the lookahead chars */
+  private int lookaheadChar = UNDEFINED;
+  /** the last char returned */
+  private int lastChar = UNDEFINED;
+  /** the line counter */
+  private int lineCounter = 0;
+  private CharBuffer line = new CharBuffer();
+  
+  /**
+   * Created extended buffered reader using default buffer-size
+   *
+   */
+  public ExtendedBufferedReader(Reader r) {
+    super(r);
+    /* note uh: do not fetch the first char here,
+     *          because this might block the method!
+     */
+  }
+    
+  /**
+   * Create extended buffered reader using the given buffer-size
+   */
+  public ExtendedBufferedReader(Reader r, int bufSize) {
+    super(r, bufSize);
+    /* note uh: do not fetch the first char here,
+     *          because this might block the method!
+     */
+  }
+  
+  /**
+   * Reads the next char from the input stream.
+   * @return the next char or END_OF_STREAM if end of stream has been reached.
+   */
+  public int read() throws IOException {
+    // initalize the lookahead
+    if (lookaheadChar == UNDEFINED) {
+      lookaheadChar = super.read();
+    }
+    lastChar = lookaheadChar;
+    if (super.ready()) {
+      lookaheadChar = super.read();
+    } else {
+      lookaheadChar = UNDEFINED;
+    }
+    if (lastChar == '\n') {
+      lineCounter++;
+    } 
+    return lastChar;
+  }
+  
+  /**
+   * Returns the last read character again.
+   * 
+   * @return the last read char or UNDEFINED
+   */
+  public int readAgain() {
+    return lastChar;  
+  }
+  
+  /**
+   * Non-blocking reading of len chars into buffer buf starting
+   * at bufferposition off.
+   * 
+   * performs an iteratative read on the underlying stream
+   * as long as the following conditions hold:
+   *   - less than len chars have been read
+   *   - end of stream has not been reached
+   *   - next read is not blocking
+   * 
+   * @return nof chars actually read or END_OF_STREAM
+   */
+  public int read(char[] buf, int off, int len) throws IOException {
+    // do not claim if len == 0
+    if (len == 0) {
+      return 0;
+    } 
+    
+    // init lookahead, but do not block !!
+    if (lookaheadChar == UNDEFINED) {
+        if (ready()) {
+         lookaheadChar = super.read();
+        } else {
+          return -1;
+        }
+    }
+    // 'first read of underlying stream'
+    if (lookaheadChar == -1) {
+      return -1;
+    }
+    // continue until the lookaheadChar would block
+    int cOff = off;
+    while (len > 0 && ready()) {
+      if (lookaheadChar == -1) {
+        // eof stream reached, do not continue
+        return cOff - off;
+      } else {
+        buf[cOff++] = (char) lookaheadChar;
+        if (lookaheadChar == '\n') {
+          lineCounter++;
+        } 
+        lastChar = lookaheadChar;
+        lookaheadChar = super.read();
+        len--;
+      }
+    }
+    return cOff - off;
+  }
+ 
+ /**
+  * Reads all characters up to (but not including) the given character.
+  * 
+  * @param c the character to read up to
+  * @return the string up to the character <code>c</code>
+  * @throws IOException
+  */
+ public String readUntil(char c) throws IOException {
+   if (lookaheadChar == UNDEFINED) {
+     lookaheadChar = super.read();
+   }
+   line.clear(); // reuse
+   while (lookaheadChar != c && lookaheadChar != END_OF_STREAM) {
+     line.append((char) lookaheadChar);
+     if (lookaheadChar == '\n') {
+       lineCounter++;
+     } 
+     lastChar = lookaheadChar;
+     lookaheadChar = super.read();
+   }
+   return line.toString();    
+ }
+ 
+ /**
+  * @return A String containing the contents of the line, not 
+  *         including any line-termination characters, or null 
+  *         if the end of the stream has been reached
+  */
+  public String readLine() throws IOException {
+    
+    if (lookaheadChar == UNDEFINED) {
+      lookaheadChar = super.read(); 
+    }
+    
+    line.clear(); //reuse
+    
+    // return null if end of stream has been reached
+    if (lookaheadChar == END_OF_STREAM) {
+      return null;
+    }
+    // do we have a line termination already
+    char laChar = (char) lookaheadChar;
+    if (laChar == '\n' || laChar == '\r') {
+      lastChar = lookaheadChar;
+      lookaheadChar = super.read();
+      // ignore '\r\n' as well
+      if ((char) lookaheadChar == '\n') {
+        lastChar = lookaheadChar;
+        lookaheadChar = super.read();
+      }
+      lineCounter++;
+      return line.toString();
+    }
+    
+    // create the rest-of-line return and update the lookahead
+    line.append(laChar);
+    String restOfLine = super.readLine(); // TODO involves copying
+    lastChar = lookaheadChar;
+    lookaheadChar = super.read();
+    if (restOfLine != null) {
+      line.append(restOfLine);
+    }
+    lineCounter++;
+    return line.toString();
+  }
+  
+  /**
+   * Skips char in the stream
+   * 
+   * ATTENTION: invalidates the line-counter !!!!!
+   * 
+   * @return nof skiped chars
+   */
+  public long skip(long n) throws IllegalArgumentException, IOException  {
+    
+    if (lookaheadChar == UNDEFINED) {
+      lookaheadChar = super.read();   
+    }
+    
+    // illegal argument
+    if (n < 0) {
+      throw new IllegalArgumentException("negative argument not supported");  
+    }
+    
+    // no skipping
+    if (n == 0 || lookaheadChar == END_OF_STREAM) {
+      return 0;
+    } 
+    
+    // skip and reread the lookahead-char
+    long skiped = 0;
+    if (n > 1) {
+      skiped = super.skip(n - 1);
+    }
+    lookaheadChar = super.read();
+    // fixme uh: we should check the skiped sequence for line-terminations...
+    lineCounter = Integer.MIN_VALUE;
+    return skiped + 1;
+  }
+  
+  /**
+   * Skips all chars in the input until (but excluding) the given char
+   * 
+   * @param c
+   * @return
+   * @throws IllegalArgumentException
+   * @throws IOException
+   */
+  public long skipUntil(char c) throws IllegalArgumentException, IOException {
+    if (lookaheadChar == UNDEFINED) {
+      lookaheadChar = super.read();   
+    }
+    long counter = 0;
+    while (lookaheadChar != c && lookaheadChar != END_OF_STREAM) {
+      if (lookaheadChar == '\n') {
+        lineCounter++;
+      } 
+      lookaheadChar = super.read();
+      counter++;
+    }
+    return counter;
+  }
+  
+  /**
+   * Returns the next char in the stream without consuming it.
+   * 
+   * Remember the next char read by read(..) will always be
+   * identical to lookAhead().
+   * 
+   * @return the next char (without consuming it) or END_OF_STREAM
+   */
+  public int lookAhead() throws IOException {
+    if (lookaheadChar == UNDEFINED) {
+      lookaheadChar = super.read();
+    }
+    return lookaheadChar;
+  }
+  
+  
+  /**
+   * Returns the nof line read
+   * ATTENTION: the skip-method does invalidate the line-number counter
+   * 
+   * @return the current-line-number (or -1)
+   */ 
+  public int getLineNumber() {
+    if (lineCounter > -1) {
+      return lineCounter;
+    } else {
+      return -1;
+    }
+  }
+  public boolean markSupported() {
+    /* note uh: marking is not supported, cause we cannot
+     *          see into the future...
+     */
+    return false;
+  }
+  
+}

Added: lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfig.java?rev=1306796&view=auto
==============================================================================
--- lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfig.java (added)
+++ lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfig.java Thu Mar 29 12:01:30 2012
@@ -0,0 +1,287 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.solr.internal.csv.writer;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * The CSVConfig is used to configure the CSV writer
+ *
+ * @author Martin van den Bemt
+ * @version $Id: $
+ */
+public class CSVConfig {
+
+    /** specifies if it is a fixed width csv file **/
+    private boolean fixedWidth;
+    /** list of fields **/
+    private List fields;
+
+    /** Do no do any filling **/
+    public static final int FILLNONE = 0;
+    /** Fill content the the left. Mainly usable together with fixedWidth **/
+    public static final int FILLLEFT = 1;
+    /** Fill content to the right. Mainly usable together with fixedWidth **/
+    public static final int FILLRIGHT = 2;
+    
+    /** The fill pattern */
+    private int fill;
+    /** The fill char. Defaults to a space */
+    private char fillChar = ' ';
+    /** The seperator character. Defaults to , */
+    private char delimiter = ',';
+    /** Should we ignore the delimiter. Defaults to false */
+    private boolean ignoreDelimiter = false;
+    /** the value delimiter. Defaults to " */
+    private char valueDelimiter = '"';
+    /** Should we ignore the value delimiter. Defaults to true */
+    private boolean ignoreValueDelimiter = true;
+    /** Specifies if we want to use a field header */
+    private boolean fieldHeader = false;
+    /** Specifies if the end of the line needs to be trimmed */
+    private boolean endTrimmed = false;
+    /**
+     * 
+     */
+    public CSVConfig() {
+        super();
+    }
+    
+    /**
+     * @return if the CSV file is fixedWidth
+     */
+    public boolean isFixedWidth() {
+        return fixedWidth;
+    }
+    
+    /**
+     * Specify if the CSV file is fixed width.
+     * Defaults to false
+     * @param fixedWidth the fixedwidth
+     */
+    public void setFixedWidth(boolean fixedWidth) {
+        this.fixedWidth = fixedWidth;
+    }
+    
+    public void addField(CSVField field) {
+        if (fields == null) {
+            fields = new ArrayList();
+        }
+        fields.add(field);
+    }
+    
+    /**
+     * Set the fields that should be used by the writer.
+     * This will overwrite currently added fields completely!
+     * @param csvFields the csvfields array. If null it will do nothing
+     */
+    public void setFields(CSVField[] csvFields) {
+        if (csvFields == null) {
+            return;
+        }
+        fields = new ArrayList(Arrays.asList(csvFields));
+    }
+    
+    /**
+     * Set the fields that should be used by the writer
+     * @param csvField a collection with fields. If null it will do nothing
+     */
+    public void setFields(Collection csvField) {
+        if (csvField == null) {
+            return;
+        }
+        fields = new ArrayList(csvField);
+    }
+
+    /**
+     * @return an array with the known fields (even if no fields are specified)
+     */
+    public CSVField[] getFields() {
+        CSVField[] csvFields = new CSVField[0];
+        if (fields != null) {
+            return (CSVField[]) fields.toArray(csvFields);
+        }
+        return csvFields;
+    }
+    
+    public CSVField getField(String name) {
+        if (fields == null || name == null) {
+            return null;
+        }
+        for(int i = 0; i < fields.size(); i++) {
+            CSVField field = (CSVField) fields.get(i);
+            if (name.equals(field.getName())) {
+                return field;
+            }
+        }
+        return null;
+    }
+
+    /**
+     * @return the fill pattern.
+     */
+    public int getFill() {
+        return fill;
+    }
+
+    /**
+     * Set the fill pattern. Defaults to {@link #FILLNONE}
+     * <br/>Other options are : {@link #FILLLEFT} and {@link #FILLRIGHT}
+     * @param fill the fill pattern.
+     */
+    public void setFill(int fill) {
+        this.fill = fill;
+    }
+
+    /**
+     * 
+     * @return the fillchar. Defaults to a space.
+     */
+    public char getFillChar() {
+        return fillChar;
+    }
+
+    /**
+     * Set the fill char
+     * @param fillChar the fill char
+     */
+    public void setFillChar(char fillChar) {
+        this.fillChar = fillChar;
+    }
+
+    /**
+     * @return the delimeter used.
+     */
+    public char getDelimiter() {
+        return delimiter;
+    }
+
+    /**
+     * Set the delimiter to use
+     * @param delimiter the delimiter character.
+     */
+    public void setDelimiter(char delimiter) {
+        this.delimiter = delimiter;
+    }
+
+    /**
+     * @return if the writer should ignore the delimiter character.
+     */
+    public boolean isDelimiterIgnored() {
+        return ignoreDelimiter;
+    }
+
+    /**
+     * Specify if the writer should ignore the delimiter. 
+     * @param ignoreDelimiter defaults to false.
+     */
+    public void setIgnoreDelimiter(boolean ignoreDelimiter) {
+        this.ignoreDelimiter = ignoreDelimiter;
+    }
+
+    /**
+     * @return the value delimeter used. Defaults to "
+     */
+    public char getValueDelimiter() {
+        return valueDelimiter;
+    }
+
+    /**
+     * Set the value delimiter to use
+     * @param valueDelimiter the value delimiter character.
+     */
+    public void setValueDelimiter(char valueDelimiter) {
+        this.valueDelimiter = valueDelimiter;
+    }
+
+    /**
+     * @return if the writer should ignore the value delimiter character.
+     *         Defaults to true.
+     */
+    public boolean isValueDelimiterIgnored() {
+        return ignoreValueDelimiter;
+    }
+
+    /**
+     * Specify if the writer should ignore the value delimiter. 
+     * @param ignoreValueDelimiter defaults to false.
+     */
+    public void setIgnoreValueDelimiter(boolean ignoreValueDelimiter) {
+        this.ignoreValueDelimiter = ignoreValueDelimiter;
+    }
+
+    /**
+     * @return if a field header is used. Defaults to false
+     */
+    public boolean isFieldHeader() {
+        return fieldHeader;
+    }
+    /**
+     * Specify if you want to use a field header.
+     * @param fieldHeader true or false.
+     */
+    public void setFieldHeader(boolean fieldHeader) {
+        this.fieldHeader = fieldHeader;
+    }
+    
+    /**
+     * TODO..
+     * @see java.lang.Object#equals(java.lang.Object)
+     */
+    public boolean equals(Object obj) {
+        if (obj == null && !(obj instanceof CSVConfig)) {
+            return false;
+        }
+        return super.equals(obj);
+//        CSVConfig config = (CSVConfig) obj;
+//        getFill() == config.getFill()
+//        getFields().equals(config.getFields())
+    }
+
+    /**
+     * Creates a config based on a stream. It tries to guess<br/>
+     * NOTE : The stream will be closed.
+     * @param inputStream the inputstream. 
+     * @return the guessed config. 
+     */
+    public static CSVConfig guessConfig(InputStream inputStream) {
+        return null;
+    }
+
+    /**
+     * @return if the end of the line should be trimmed. Default is false.
+     */
+    public boolean isEndTrimmed() {
+        return endTrimmed;
+    }
+
+    /**
+     * Specify if the end of the line needs to be trimmed. Defaults to false.
+     * @param endTrimmed
+     */
+    public void setEndTrimmed(boolean endTrimmed) {
+        this.endTrimmed = endTrimmed;
+    }
+
+    
+}

Added: lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfigGuesser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfigGuesser.java?rev=1306796&view=auto
==============================================================================
--- lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfigGuesser.java (added)
+++ lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfigGuesser.java Thu Mar 29 12:01:30 2012
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.solr.internal.csv.writer;
+
+import java.io.BufferedReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+
+/**
+ * Tries to guess a config based on an InputStream.
+ *
+ * @author Martin van den Bemt
+ * @version $Id: $
+ */
+public class CSVConfigGuesser {
+
+    /** The stream to read */
+    private InputStream in;
+    /** 
+     * if the file has a field header (need this info, to be able to guess better)
+     * Defaults to false
+     */
+    private boolean hasFieldHeader = false;
+    /** The found config */
+    protected CSVConfig config;
+    
+    /**
+     * 
+     */
+    public CSVConfigGuesser() {
+        this.config = new CSVConfig();
+    }
+    
+    /**
+     * @param in the inputstream to guess from
+     */
+    public CSVConfigGuesser(InputStream in) {
+        this();
+        setInputStream(in);
+    }
+    
+    public void setInputStream(InputStream in) {
+        this.in = in;
+    }
+    
+    /**
+     * Allow override.
+     * @return the inputstream that was set.
+     */
+    protected InputStream getInputStream() {
+        return in;
+    }
+    
+    /**
+     * Guess the config based on the first 10 (or less when less available) 
+     * records of a CSV file.
+     * 
+     * @return the guessed config.
+     */
+    public CSVConfig guess() {
+        try {
+            // tralalal
+            BufferedReader bIn = new BufferedReader(new InputStreamReader((getInputStream())));
+            String[] lines = new String[10];
+            String line = null;
+            int counter = 0;
+            while ( (line = bIn.readLine()) != null && counter <= 10) {
+                lines[counter] = line;
+                counter++;
+            }
+            if (counter < 10) {
+                // remove nulls from the array, so we can skip the null checking.
+                String[] newLines = new String[counter];
+                System.arraycopy(lines, 0, newLines, 0, counter);
+                lines = newLines;
+            }
+            analyseLines(lines);
+        } catch(Exception e) {
+            e.printStackTrace();
+        } finally {
+            if (in != null) {
+                try {
+                    in.close();
+                } catch(Exception e) {
+                    // ignore exception.
+                }
+            }
+        }
+        CSVConfig conf = config;
+        // cleanup the config.
+        config = null;
+        return conf;
+    }
+    
+    protected void analyseLines(String[] lines) {
+        guessFixedWidth(lines);
+        guessFieldSeperator(lines);
+    }
+    
+    /**
+     * Guess if this file is fixedwidth.
+     * Just basing the fact on all lines being of the same length
+     * @param lines
+     */
+    protected void guessFixedWidth(String[] lines) {
+        int lastLength = 0;
+        // assume fixedlength.
+        config.setFixedWidth(true);
+        for (int i = 0; i < lines.length; i++) {
+            if (i == 0) {
+                lastLength = lines[i].length();
+            } else {
+                if (lastLength != lines[i].length()) {
+                    config.setFixedWidth(false);
+                }
+            }
+        }
+    }
+        
+
+    protected void guessFieldSeperator(String[] lines) {
+        if (config.isFixedWidth()) {
+            guessFixedWidthSeperator(lines);
+            return;
+        }
+        for (int i = 0; i < lines.length; i++) {
+        }
+    }
+    
+    protected void guessFixedWidthSeperator(String[] lines) {
+        // keep track of the fieldlength
+        int previousMatch = -1;
+        for (int i = 0; i < lines[0].length(); i++) {
+            char last = ' ';
+            boolean charMatches = true;
+            for (int j = 0; j < lines.length; j++) {
+                if (j == 0) {
+                    last = lines[j].charAt(i);
+                }
+                if (last != lines[j].charAt(i)) {
+                    charMatches = false;
+                    break;
+                } 
+            }
+            if (charMatches) {
+                if (previousMatch == -1) {
+                    previousMatch = 0;
+                }
+                CSVField field = new CSVField();
+                field.setName("field"+config.getFields().length+1);
+                field.setSize((i-previousMatch));
+                config.addField(field);
+            }
+        }
+    }
+    /**
+     * 
+     * @return if the field uses a field header. Defaults to false.
+     */
+    public boolean hasFieldHeader() {
+        return hasFieldHeader;
+    }
+
+    /**
+     * Specify if the CSV file has a field header
+     * @param hasFieldHeader true or false
+     */
+    public void setHasFieldHeader(boolean hasFieldHeader) {
+        this.hasFieldHeader = hasFieldHeader;
+    }
+    
+ 
+}

Added: lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVField.java?rev=1306796&view=auto
==============================================================================
--- lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVField.java (added)
+++ lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVField.java Thu Mar 29 12:01:30 2012
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.solr.internal.csv.writer;
+
+
+/**
+ * 
+ * @author Martin van den Bemt
+ * @version $Id: $
+ */
+public class CSVField {
+
+    private String name;
+    private int size;
+    private int fill;
+    private boolean overrideFill;
+
+    /**
+     * 
+     */
+    public CSVField() {
+    }
+
+    /**
+     * @param name the name of the field
+     */
+    public CSVField(String name) {
+        setName(name);
+    }
+
+    /**
+     * @param name the name of the field
+     * @param size the size of the field
+     */
+    public CSVField(String name, int size) {
+        setName(name);
+        setSize(size);
+    }
+
+    /**
+     * @return the name of the field
+     */
+    public String getName() {
+        return name;
+    }
+    
+    /**
+     * Set the name of the field
+     * @param name the name
+     */
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    /**
+     * 
+     * @return the size of the field
+     */
+    public int getSize() {
+        return size;
+    }
+
+    /**
+     * Set the size of the field.
+     * The size will be ignored when fixedwidth is set to false in the CSVConfig
+     * @param size the size of the field.
+     */
+    public void setSize(int size) {
+        this.size = size;
+    }
+
+    /**
+     * @return the fill pattern.
+     */
+    public int getFill() {
+        return fill;
+    }
+
+    /**
+     * Sets overrideFill to true.
+     * @param fill the file pattern
+     */
+    public void setFill(int fill) {
+        overrideFill = true;
+        this.fill = fill;
+    }
+    
+    /**
+     * Does this field override fill ?
+     * 
+     * @return
+     */
+    public boolean overrideFill() {
+        return overrideFill;
+    }
+
+}

Added: lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVWriter.java?rev=1306796&view=auto
==============================================================================
--- lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVWriter.java (added)
+++ lucene/dev/branches/lucene3930/solr/core/src/java/org/apache/solr/internal/csv/writer/CSVWriter.java Thu Mar 29 12:01:30 2012
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.solr.internal.csv.writer;
+
+import java.io.Writer;
+import java.util.Arrays;
+import java.util.Map;
+
+
+/**
+ * CSVWriter
+ *
+ * @author Martin van den Bemt
+ * @version $Id: $
+ */
+public class CSVWriter {
+
+    /** The CSV config **/
+    private CSVConfig config;
+    /** The writer **/
+    private Writer writer;
+    /**
+     * 
+     */
+    public CSVWriter() {
+    }
+    
+    public CSVWriter(CSVConfig config) {
+        setConfig(config);
+    }
+
+    public void writeRecord(Map map) {
+        CSVField[] fields = config.getFields();
+        try {
+            StringBuffer sb = new StringBuffer();
+            for (int i = 0; i < fields.length; i++) {
+                Object o = map.get(fields[i].getName());
+                if (o != null) {
+                    String value = o.toString();
+                    value = writeValue(fields[i], value);
+                    sb.append(value);
+                }
+                if (!config.isDelimiterIgnored() && fields.length != (i+1)) {
+                    sb.append(config.getDelimiter());
+                }
+            }
+            if (config.isEndTrimmed()) {
+                for (int i = sb.length()-1; i >= 0; i--) {
+                    System.out.println("i : " + i);
+                    if (Character.isWhitespace(sb.charAt(i))) {
+                        sb.deleteCharAt(i);
+                    } else {
+                        break;
+                    }
+                }
+            }
+            sb.append("\n");
+            String line = sb.toString();
+            writer.write(line);
+        } catch(Exception e) {
+            e.printStackTrace();
+        }
+    }
+    
+    protected String writeValue(CSVField field, String value) throws Exception {
+        if (config.isFixedWidth()) {
+            if (value.length() < field.getSize()) {
+                int fillPattern = config.getFill();
+                if (field.overrideFill()) {
+                    fillPattern = field.getFill();
+                }
+                StringBuffer sb = new StringBuffer();
+                int fillSize = (field.getSize() - value.length());
+                char[] fill = new char[fillSize];
+                Arrays.fill(fill, config.getFillChar());
+                if (fillPattern == CSVConfig.FILLLEFT) {
+                    sb.append(fill);
+                    sb.append(value);
+                    value = sb.toString();
+                } else {
+                    // defaults to fillpattern FILLRIGHT when fixedwidth is used
+                    sb.append(value);
+                    sb.append(fill);
+                    value = sb.toString();
+                }
+            } else if (value.length() > field.getSize()) {
+                // value to big..
+                value = value.substring(0, field.getSize());
+            }
+            if (!config.isValueDelimiterIgnored()) {
+                // add the value delimiter..
+                value = config.getValueDelimiter()+value+config.getValueDelimiter();
+            }
+        }
+        return value;
+    }
+    /**
+     * @return the CVSConfig or null if not present
+     */
+    public CSVConfig getConfig() {
+        return config;
+    }
+
+    /**
+     * Set the CSVConfig
+     * @param config the CVSConfig
+     */
+    public void setConfig(CSVConfig config) {
+        this.config = config;
+    }
+    
+    /**
+     * Set the writer to write the CSV file to.
+     * @param writer the writer.
+     */
+    public void setWriter(Writer writer) {
+        this.writer = writer;
+    }
+
+}



Mime
View raw message