commons-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From oglu...@apache.org
Subject svn commit: r430322 - in /jakarta/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv: CSVParser.java ExtendedBufferedReader.java
Date Thu, 10 Aug 2006 09:01:19 GMT
Author: oglueck
Date: Thu Aug 10 02:01:18 2006
New Revision: 430322

URL: http://svn.apache.org/viewvc?rev=430322&view=rev
Log:
This patch reduces the amount of intermediate garbage significantly.
PR: SANDBOX-166
Contributed by: Ortwin Gl├╝ck
Reviewed by: Henri Yandell

Modified:
    jakarta/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java
    jakarta/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/ExtendedBufferedReader.java

Modified: jakarta/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java
URL: http://svn.apache.org/viewvc/jakarta/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java?rev=430322&r1=430321&r2=430322&view=diff
==============================================================================
--- jakarta/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java (original)
+++ jakarta/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java Thu Aug
10 02:01:18 2006
@@ -19,7 +19,7 @@
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
-import java.util.Vector;
+import java.util.ArrayList;
 
 
 /**
@@ -63,12 +63,23 @@
   protected static final int TT_EOF = 1;
   /** Token with content when end of a line is reached. */
   protected static final int TT_EORECORD = 2;
+
+  /** Immutable empty String array. */
+  private static final String[] EMPTY_STRING_ARRAY = new String[0];
    
   // the input stream
   private ExtendedBufferedReader in;
 
   private CSVStrategy strategy;
   
+  // the following objects are shared to reduce garbage 
+  /** A record buffer for getLine(). Grows as necessary and is reused. */
+  private ArrayList record = new ArrayList();
+  private Token reusableToken = new Token();
+  private CharBuffer wsBuf = new CharBuffer();
+  private CharBuffer code = new CharBuffer(4);
+
+  
   /**
    * Token is an internal token representation.
    * 
@@ -76,16 +87,17 @@
    */
   class Token {
     /** Token type, see TT_xxx constants. */
-    int type;
+    int type = TT_INVALID;
     /** The content buffer. */
-    StringBuffer content;
+    CharBuffer content = new CharBuffer(INITIAL_TOKEN_LENGTH);
     /** Token ready flag: indicates a valid token with content (ready for the parser). */
     boolean isReady;
-    /** Initializes an empty token. */
-    Token() {
-      content = new StringBuffer(INITIAL_TOKEN_LENGTH);
-      type = TT_INVALID;
-      isReady = false;
+    
+    Token reset() {
+        content.clear();
+        type = TT_INVALID;
+        isReady = false;
+        return this;
     }
   }
   
@@ -160,7 +172,7 @@
    * @throws IOException on parse error or input read-failure
    */
   public String[][] getAllValues() throws IOException {
-    Vector records = new Vector();
+    ArrayList records = new ArrayList();
     String[] values;
     String[][] ret = null;
     while ((values = getLine()) != null)  {
@@ -211,35 +223,35 @@
    * @throws IOException on parse error or input read-failure
    */
   public String[] getLine() throws IOException {
-    Vector record = new Vector();
-    String[] ret = new String[0];
-    Token tkn;
-    while ((tkn = nextToken()).type == TT_TOKEN) {
-      record.add(tkn.content.toString());  
-    }
-    // did we reached eorecord or eof ?
-    switch (tkn.type) {
-      case TT_EORECORD:
-        record.add(tkn.content.toString());
-        break;
-      case TT_EOF:
-        if (tkn.isReady) {
-          record.add(tkn.content.toString());
-        } else {
-          ret = null;
+    String[] ret = EMPTY_STRING_ARRAY;
+    record.clear();
+    while (true) {
+        reusableToken.reset();
+        nextToken(reusableToken);
+        switch (reusableToken.type) {
+            case TT_TOKEN:
+                record.add(reusableToken.content.toString());
+                break;
+            case TT_EORECORD:
+                record.add(reusableToken.content.toString());
+                break;
+            case TT_EOF:
+                if (reusableToken.isReady) {
+                    record.add(reusableToken.content.toString());
+                } else {
+                    ret = null;
+                }
+                break;
+            case TT_INVALID:
+            default:
+                // error: throw IOException
+                throw new IOException("(line " + getLineNumber() + ") invalid parse sequence");
+            // unreachable: break;
         }
-        break;
-      case TT_INVALID:
-      default:
-        // error: throw IOException
-        throw new IOException(
-          "(line " + getLineNumber() 
-          + ") invalid parse sequence");
-        // unreachable: break;
+        if (reusableToken.type != TT_TOKEN) break;
     }
-    if (record.size() > 0) {
-      ret = new String[record.size()];
-      record.toArray(ret);
+    if (!record.isEmpty()) {
+      ret = (String[]) record.toArray(new String[record.size()]);
     }
     return ret;
   }
@@ -260,18 +272,26 @@
   //  the lexer(s)
   // ======================================================
  
+  /**
+   * Convenience method for <code>nextToken(null)</code>.
+   */
+  protected Token nextToken() throws IOException {
+      return nextToken(new Token());
+  }
+  
  /**
    * Returns the next token.
    * 
    * A token corresponds to a term, a record change or an
    * end-of-file indicator.
    * 
+   * @param tkn an existing Token object to reuse. The caller is responsible to initialize
the
+   * Token.
    * @return the next token found
    * @throws IOException on stream access error
    */
-  protected Token nextToken() throws IOException {
-    Token tkn = new Token();
-    StringBuffer wsBuf = new StringBuffer();
+  protected Token nextToken(Token tkn) throws IOException {
+    wsBuf.clear(); // resuse
     
     // get the last read char (required for empty line detection)
     int lastChar = in.readAgain();
@@ -321,14 +341,14 @@
       if (!strategy.isCommentingDisabled() && c == strategy.getCommentStart()) {
         // ignore everything till end of line and continue (incr linecount)
         in.readLine();
-        tkn = nextToken();
+        tkn = nextToken(tkn.reset());
       } else if (c == strategy.getDelimiter()) {
         // empty token return TT_TOKEN("")
         tkn.type = TT_TOKEN;
         tkn.isReady = true;
       } else if (eol) {
         // empty token return TT_EORECORD("")
-        tkn.content.append("");
+        //noop: tkn.content.append("");
         tkn.type = TT_EORECORD;
         tkn.isReady = true;
       } else if (c == strategy.getEncapsulator()) {
@@ -336,14 +356,14 @@
         encapsulatedTokenLexer(tkn, c);
       } else if (isEndOfFile(c)) {
         // end of file return TT_EOF()
-        tkn.content.append("");
+        //noop: tkn.content.append("");
         tkn.type = TT_EOF;
         tkn.isReady = true;
       } else {
         // next token must be a simple token
         // add removed blanks when not ignoring whitespace chars...
         if (!strategy.getIgnoreLeadingWhitespaces()) {
-          tkn.content.append(wsBuf.toString());
+          tkn.content.append(wsBuf);
         }
         simpleTokenLexer(tkn, c);
       }
@@ -370,7 +390,7 @@
    * @throws IOException on stream access error
    */
   private Token simpleTokenLexer(Token tkn, int c) throws IOException {
-    StringBuffer wsBuf = new StringBuffer();
+    wsBuf.clear();
     while (!tkn.isReady) {
       if (isEndOfLine(c)) {
         // end of record
@@ -396,9 +416,8 @@
       } else {
         // prepend whitespaces (if we have)
         if (wsBuf.length() > 0) {
-          // for J2SDK 1.3 compatibility we use toString()
-          tkn.content.append(wsBuf.toString());
-          wsBuf.delete(0, wsBuf.length());
+          tkn.content.append(wsBuf);
+          wsBuf.clear();
         }
         tkn.content.append((char) c);
       }
@@ -508,7 +527,7 @@
     int ret = 0;
     // ignore 'u' (assume c==\ now) and read 4 hex digits
     c = in.read();
-    StringBuffer code = new StringBuffer(4);
+    code.clear();
     try {
       for (int i = 0; i < 4; i++) {
         c  = in.read();

Modified: jakarta/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/ExtendedBufferedReader.java
URL: http://svn.apache.org/viewvc/jakarta/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/ExtendedBufferedReader.java?rev=430322&r1=430321&r2=430322&view=diff
==============================================================================
--- jakarta/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/ExtendedBufferedReader.java
(original)
+++ jakarta/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/ExtendedBufferedReader.java
Thu Aug 10 02:01:18 2006
@@ -47,6 +47,8 @@
   private int lastChar = UNDEFINED;
   /** the line counter */
   private int lineCounter = 0;
+  private CharBuffer line = new CharBuffer();
+  
   /**
    * Created extended buffered reader using default buffer-size
    *
@@ -154,16 +156,16 @@
    if (lookaheadChar == UNDEFINED) {
      lookaheadChar = super.read();
    }
-   StringBuffer ret = new StringBuffer("");
+   line.clear(); // reuse
    while (lookaheadChar != c && lookaheadChar != END_OF_STREAM) {
-     ret.append((char) lookaheadChar);
+     line.append((char) lookaheadChar);
      if (lookaheadChar == '\n') {
        lineCounter++;
      } 
      lastChar = lookaheadChar;
      lookaheadChar = super.read();
    }
-   return ret.toString();    
+   return line.toString();    
  }
  
  /**
@@ -177,7 +179,7 @@
       lookaheadChar = super.read(); 
     }
     
-    StringBuffer ret = new StringBuffer("");
+    line.clear(); //reuse
     
     // return null if end of stream has been reached
     if (lookaheadChar == END_OF_STREAM) {
@@ -194,19 +196,19 @@
         lookaheadChar = super.read();
       }
       lineCounter++;
-      return ret.toString();
+      return line.toString();
     }
     
     // create the rest-of-line return and update the lookahead
-    ret.append(String.valueOf(laChar));
-    String restOfLine = super.readLine();
+    line.append(laChar);
+    String restOfLine = super.readLine(); // TODO involves copying
     lastChar = lookaheadChar;
     lookaheadChar = super.read();
     if (restOfLine != null) {
-      ret.append(restOfLine);
+      line.append(restOfLine);
     }
     lineCounter++;
-    return ret.toString();
+    return line.toString();
   }
   
   /**



---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org


Mime
View raw message