jakarta-regexp-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j..@locus.apache.org
Subject cvs commit: jakarta-regexp/src/java/org/apache/regexp CharacterArrayCharacterIterator.java CharacterIterator.java ReaderCharacterIterator.java StreamCharacterIterator.java StringCharacterIterator.java RE.java RECompiler.java
Date Sun, 14 May 2000 21:04:18 GMT
jon         00/05/14 14:04:18

  Modified:    src/java/org/apache/regexp RE.java RECompiler.java
  Added:       src/java/org/apache/regexp
                        CharacterArrayCharacterIterator.java
                        CharacterIterator.java ReaderCharacterIterator.java
                        StreamCharacterIterator.java
                        StringCharacterIterator.java
  Log:
  There are three patches -
  RECompiler.compile() - copy on return
  Think of:
  // -------------------------
  RECompiler rc = new RECompiler();
  REProgram pr1 = rc.compile("...");
  RE re1 = new RE(pr1);
  REProgram pr2 = rc.compile("...");
  RE re2 = new RE(pr2);
  // -------------------------
  re1 and re2 use different programs now
  
  RE.matchNodes() - BOL and EOL cases
  if RE.MATCH_MULTILINE is specified.
  - Think of RETest.java which prints
  MATCH!!!: ......
  two times while in the older version only one time.
  
  RE.java also contains an extension which allow to parse
  InputStreams, Readers and char arrays in addition to Strings.
  All needed modifications are only in the RE class.
  
  Ales Novak <ales.novak@netbeans.com>
  
  Revision  Changes    Path
  1.2       +54 -23    jakarta-regexp/src/java/org/apache/regexp/RE.java
  
  Index: RE.java
  ===================================================================
  RCS file: /home/cvs/jakarta-regexp/src/java/org/apache/regexp/RE.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- RE.java	2000/04/27 01:22:33	1.1
  +++ RE.java	2000/05/14 21:04:17	1.2
  @@ -347,7 +347,7 @@
    * @see RECompiler
    *
    * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
  - * @version $Id: RE.java,v 1.1 2000/04/27 01:22:33 jon Exp $
  + * @version $Id: RE.java,v 1.2 2000/05/14 21:04:17 jon Exp $
    */
   public class RE
   {
  @@ -437,11 +437,13 @@
       static final int offsetNext   = 2;            // Next index offset (third char)
       static final int nodeSize     = 3;            // Node size (in chars)
   
  +    /** Line Separator */
  +    static final String NEWLINE = System.getProperty("line.separator");
  +
       // State of current program
       REProgram program;                            // Compiled regular expression 'program'
  -    String search;                                // The string being matched against
  +    CharacterIterator search;                                // The string being matched
against
       int idx;                                      // Current index in string being searched
  -    int len;                                      // Length of string being searched
       int matchFlags;                               // Match behaviour flags
   
       // Parenthesized subexpressions
  @@ -842,9 +844,6 @@
           // Our current place in the string
           int idx = idxStart;
   
  -        // Length of string to match against
  -        int len = this.len;
  -
           // Loop while node is valid
           int next, opcode, opdata;
           int idxNew;
  @@ -963,7 +962,7 @@
                           int l = e - s;
   
                           // If there's not enough input left, give up.
  -                        if (idx + l > len)
  +                        if (search.isEnd(idx + l))
                           {
                               return -1;
                           }
  @@ -1003,9 +1002,10 @@
                           if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
                           {
                               // If not at start of line, give up
  -                            if (idx <= 0 || search.charAt(idx - 1) != '\n') // JWL -
bugbug: isLineTerminator... ???
  -                            {
  +                            if (idx <= 0 || !isNewline(idx - 1)) {
                                   return -1;
  +                            } else {
  +                                break;
                               }
                           }
                           return -1;
  @@ -1015,15 +1015,16 @@
                   case OP_EOL:
   
                       // If we're not at the end of string
  -                    if (len != 0 && idx != len)
  +                    if (!search.isEnd(0) && !search.isEnd(idx))
                       {
                           // If we're multi-line matching
                           if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
                           {
                               // Give up if we're not at the end of a line
  -                            if (search.charAt(idx) != '\n')
  -                            {
  +                            if (! isNewline(idx)) {
                                   return -1;
  +                            } else {
  +                                break;
                               }
                           }
                           return -1;
  @@ -1040,7 +1041,7 @@
                           case E_BOUND:
                               {
                                   char cLast = ((idx == getParenStart(0)) ? '\n' : search.charAt(idx
- 1));
  -                                char cNext = ((idx == len) ? '\n' : search.charAt(idx));
  +                                char cNext = ((search.isEnd(idx)) ? '\n' : search.charAt(idx));
                                   if ((Character.isLetterOrDigit(cLast) == Character.isLetterOrDigit(cNext))
== (opdata == E_BOUND))
                                   {
                                       return -1;
  @@ -1057,7 +1058,7 @@
                           case E_NSPACE:
   
                               // Give up if out of input
  -                            if (idx >= len)
  +                            if (search.isEnd(idx))
                               {
                                   return -1;
                               }
  @@ -1100,7 +1101,7 @@
                   case OP_ANY:
   
                       // Match anything but a newline
  -                    if (idx >= len || search.charAt(idx++) == '\n')
  +                    if (search.isEnd(idx) || search.charAt(idx++) == '\n')
                       {
                           return -1;
                       }
  @@ -1109,7 +1110,7 @@
                   case OP_ATOM:
                       {
                           // Match an atom value
  -                        if (idx >= len)
  +                        if (search.isEnd(idx))
                           {
                               return -1;
                           }
  @@ -1119,7 +1120,7 @@
                           int startAtom = node + nodeSize;
   
                           // Give up if not enough input remains to have a match
  -                        if (len - idx < lenAtom)
  +                        if (search.isEnd(lenAtom + idx - 1))
                           {
                               return -1;
                           }
  @@ -1151,7 +1152,7 @@
                   case OP_POSIXCLASS:
                       {
                           // Out of input?
  -                        if (idx >= len)
  +                        if (search.isEnd(idx))
                           {
                               return -1;
                           }
  @@ -1292,7 +1293,7 @@
                   case OP_ANYOF:
                       {
                           // Out of input?
  -                        if (idx >= len)
  +                        if (search.isEnd(idx))
                           {
                               return -1;
                           }
  @@ -1444,8 +1445,20 @@
        * @param i Index to start searching at
        * @return True if string matched
        */
  -    public boolean match(String search, int i)
  +    public boolean match(String search, int i) 
       {
  +        return match(new StringCharacterIterator(search), i);
  +    }
  +
  +    /**
  +     * Matches the current regular expression program against a character array,
  +     * starting at a given index.
  +     * @param search String to match against
  +     * @param i Index to start searching at
  +     * @return True if string matched
  +     */
  +    public boolean match(CharacterIterator search, int i)
  +    {
           // There is no compiled program to search with!
           if (program == null)
           {
  @@ -1456,13 +1469,12 @@
   
           // Save string to search
           this.search = search;
  -        this.len = search.length();
   
           // Can we optimize the search by looking for a prefix string?
           if (program.prefix == null)
           {
               // Unprefixed matching must try for a match at each character
  -            for ( ; i <= len; i++)
  +            for ( ;! search.isEnd(i - 1); i++)
               {
                   // Try a match at index i
                   if (matchAt(i))
  @@ -1476,7 +1488,7 @@
           {
               // Prefix-anchored matching is possible
               char[] prefix = program.prefix;
  -            for ( ; i + prefix.length <= len; i++)
  +            for ( ;! search.isEnd(i + prefix.length - 1); i++)
               {
                   // If the first character of the prefix matches
                   if (search.charAt(i) == prefix[0])
  @@ -1700,5 +1712,24 @@
           String[] ret = new String[v.size()];
           v.copyInto(ret);
           return ret;
  +    }
  +
  +    /** @return true if at the i-th position in the 'search' a newline ends */
  +    private boolean isNewline(int i) {
  +
  +        if (i < NEWLINE.length() - 1) {
  +            return false;
  +        }
  +
  +        if (search.charAt(i) == '\n') {
  +            return true;
  +        }
  +
  +        for (int j = NEWLINE.length() - 1; j >= 0; j--, i--) {
  +            if (NEWLINE.charAt(j) != search.charAt(i)) {
  +                return false;
  +            }
  +        }
  +        return true;
       }
   }
  
  
  
  1.2       +4 -2      jakarta-regexp/src/java/org/apache/regexp/RECompiler.java
  
  Index: RECompiler.java
  ===================================================================
  RCS file: /home/cvs/jakarta-regexp/src/java/org/apache/regexp/RECompiler.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- RECompiler.java	2000/04/27 01:22:33	1.1
  +++ RECompiler.java	2000/05/14 21:04:17	1.2
  @@ -71,7 +71,7 @@
    * @see recompile
    *
    * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
  - * @version $Id: RECompiler.java,v 1.1 2000/04/27 01:22:33 jon Exp $
  + * @version $Id: RECompiler.java,v 1.2 2000/05/14 21:04:17 jon Exp $
    */
   public class RECompiler
   {
  @@ -1291,7 +1291,9 @@
           }
   
           // Return the result
  -        return new REProgram(instruction, lenInstruction);
  +        char[] ins = new char[lenInstruction];
  +        System.arraycopy(instruction, 0, ins, 0, lenInstruction);
  +        return new REProgram(ins);
       }
   
       /**
  
  
  
  1.1                  jakarta-regexp/src/java/org/apache/regexp/CharacterArrayCharacterIterator.java
  
  Index: CharacterArrayCharacterIterator.java
  ===================================================================
  package org.apache.regexp;
  
  /*
   * ====================================================================
   * 
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:  
   *       "This product includes software developed by the 
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written 
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Group.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   *
   */ 
  
  /** Encapsulates String
   *
   * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a>
   */
  public final class CharacterArrayCharacterIterator implements CharacterIterator
  {
      /** encapsulated */
      private final char[] src;
      /** offset in the char array */
      private final int off;
      /** used portion of the array */
      private final int len;
  
      /** @param src - encapsulated String */
      public CharacterArrayCharacterIterator(char[] src, int off, int len)
      {
          this.src = src;
          this.off = off;
          this.len = len;
      }
  
      /** @return a substring */
      public String substring(int offset, int length)
      {
          return new String(src, off + offset, length);
      }
  
      /** @return a substring */
      public String substring(int offset)
      {
          return new String(src, off + offset, len);
      }
  
      /** @return a character at the specified position. */
      public char charAt(int pos)
      {
          return src[off + pos];
      }
  
      /** @return <tt>true</tt> iff if the specified index is after the end of
the character stream */
      public boolean isEnd(int pos)
      {
          return (pos >= len);
      }
  }
  
  
  
  1.1                  jakarta-regexp/src/java/org/apache/regexp/CharacterIterator.java
  
  Index: CharacterIterator.java
  ===================================================================
  package org.apache.regexp;
  
  /*
   * ====================================================================
   * 
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:  
   *       "This product includes software developed by the 
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written 
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Group.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   *
   */ 
  
  /** Encapsulates different types of character sources - String, InputStream, ...
   * Defines a set of common methods
   *
   * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a>
   */
  public interface CharacterIterator
  {
      /** @return a substring */
      String substring(int offset, int length);
  
      /** @return a substring */
      String substring(int offset);
  
      /** @return a character at the specified position. */
      char charAt(int pos);
  
      /** @return <tt>true</tt> iff if the specified index is after the end of
the character stream */
      boolean isEnd(int pos);
  }
  
  
  
  1.1                  jakarta-regexp/src/java/org/apache/regexp/ReaderCharacterIterator.java
  
  Index: ReaderCharacterIterator.java
  ===================================================================
  package org.apache.regexp;
  
  /*
   * ====================================================================
   * 
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:  
   *       "This product includes software developed by the 
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written 
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Group.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   *
   */ 
  
  import java.io.Reader;
  import java.io.IOException;
  
  /** Encapsulates InputStream, ...
   *
   * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a>
   */
  public final class ReaderCharacterIterator implements CharacterIterator
  {
      /** Underlying is */
      private final Reader reader;
  
      /** Buffer of read chars */
      private final StringBuffer buff;
  
      /** read end? */
      private boolean closed;
  
      /** @param is an Reader, which is parsed */
      public ReaderCharacterIterator(Reader reader)
      {
          this.reader = reader;
          this.buff = new StringBuffer(512);
          this.closed = false;
      }
  
      /** @return a substring */
      public String substring(int offset, int length)
      {
          try
          {
              ensure(offset + length);
              return buff.toString().substring(offset, length);
          }
          catch (IOException e)
          {
              throw new StringIndexOutOfBoundsException(e.getMessage());
          }
      }
  
      /** @return a substring */
      public String substring(int offset)
      {
          try
          {
              readAll();
              return buff.toString().substring(offset);
          }
          catch (IOException e)
          {
              throw new StringIndexOutOfBoundsException(e.getMessage());
          }
      }
  
      /** @return a character at the specified position. */
      public char charAt(int pos)
      {
          try
          {
              ensure(pos);
              return buff.charAt(pos);
          }
          catch (IOException e)
          {
              throw new StringIndexOutOfBoundsException(e.getMessage());
          }
      }
  
      /** @return <tt>true</tt> iff if the specified index is after the end of
the character stream */
      public boolean isEnd(int pos)
      {
          if (buff.length() > pos)
          {
              return false;
          }
          else
          {
              try
              {
                  ensure(pos);
                  return (buff.length() <= pos);
              }
              catch (IOException e)
              {
                  throw new StringIndexOutOfBoundsException(e.getMessage());
              }
          }
      }
  
      /** Reads n characters from the stream and appends them to the buffer */
      private int read(int n) throws IOException
      {
          if (closed)
          {
              return 0;
          }
  
          char[] c = new char[n];
          int count = 0;
          int read = 0;
  
          do
          {
              read = reader.read(c);
              if (read < 0) // EOF
              {
                  closed = true;
                  break;
              }
              count += read;
              buff.append(c, 0, read);
          }
          while (count < n);
  
          return count;
      }
  
      /** Reads rest of the stream. */
      private void readAll() throws IOException
      {
          while(! closed)
          {
              read(1000);
          }
      }
  
      /** Reads chars up to the idx */
      private void ensure(int idx) throws IOException
      {
          if (closed)
          {
              return;
          }
  
          if (idx < buff.length())
          {
              return;
          }
          read(idx + 1 - buff.length());
      }
  }
  
  
  
  1.1                  jakarta-regexp/src/java/org/apache/regexp/StreamCharacterIterator.java
  
  Index: StreamCharacterIterator.java
  ===================================================================
  package org.apache.regexp;
  
  /*
   * ====================================================================
   * 
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:  
   *       "This product includes software developed by the 
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written 
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Group.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   *
   */ 
  
  import java.io.InputStream;
  import java.io.IOException;
  
  /** Encapsulates InputStream, ...
   *
   * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a>
   */
  public final class StreamCharacterIterator implements CharacterIterator
  {
      /** Underlying is */
      private final InputStream is;
  
      /** Buffer of read chars */
      private final StringBuffer buff;
  
      /** read end? */
      private boolean closed;
  
      /** @param is an InputStream, which is parsed */
      public StreamCharacterIterator(InputStream is)
      {
          this.is = is;
          this.buff = new StringBuffer(512);
          this.closed = false;
      }
  
      /** @return a substring */
      public String substring(int offset, int length)
      {
          try
          {
              ensure(offset + length);
              return buff.toString().substring(offset, length);
          }
          catch (IOException e)
          {
              throw new StringIndexOutOfBoundsException(e.getMessage());
          }
      }
  
      /** @return a substring */
      public String substring(int offset)
      {
          try
          {
              readAll();
              return buff.toString().substring(offset);
          }
          catch (IOException e)
          {
              throw new StringIndexOutOfBoundsException(e.getMessage());
          }
      }
  
  
      /** @return a character at the specified position. */
      public char charAt(int pos)
      {
          try
          {
              ensure(pos);
              return buff.charAt(pos);
          }
          catch (IOException e)
          {
              throw new StringIndexOutOfBoundsException(e.getMessage());
          }
      }
  
      /** @return <tt>true</tt> iff if the specified index is after the end of
the character stream */
      public boolean isEnd(int pos)
      {
          if (buff.length() > pos)
          {
              return false;
          }
          else
          {
              try
              {
                  ensure(pos);
                  return (buff.length() <= pos);
              }
              catch (IOException e)
              {
                  throw new StringIndexOutOfBoundsException(e.getMessage());
              }
          }
      }
  
      /** Reads n characters from the stream and appends them to the buffer */
      private int read(int n) throws IOException
      {
          if (closed)
          {
              return 0;
          }
  
          int c;
          int i = n;
          while (--i >= 0)
          {
              c = is.read();
              if (c < 0) // EOF
              {
                  closed = true;
                  break;
              }
              buff.append((char) c);
          }
          return n - i;
      }
  
      /** Reads rest of the stream. */
      private void readAll() throws IOException
      {
          while(! closed)
          {
              read(1000);
          }
      }
  
      /** Reads chars up to the idx */
      private void ensure(int idx) throws IOException
      {
          if (closed)
          {
              return;
          }
  
          if (idx < buff.length())
          {
              return;
          }
  
          read(idx + 1 - buff.length());
      }
  }
  
  
  
  1.1                  jakarta-regexp/src/java/org/apache/regexp/StringCharacterIterator.java
  
  Index: StringCharacterIterator.java
  ===================================================================
  package org.apache.regexp;
  
  /*
   * ====================================================================
   * 
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:  
   *       "This product includes software developed by the 
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "Jakarta-Regexp", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written 
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Group.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   *
   */ 
  
  /** Encapsulates String
   *
   * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a>
   */
  public final class StringCharacterIterator implements CharacterIterator
  {
      /** encapsulated */
      private final String src;
  
      /** @param src - encapsulated String */
      public StringCharacterIterator(String src)
      {
          this.src = src;
      }
  
      /** @return a substring */
      public String substring(int offset, int length)
      {
          return src.substring(offset, length);
      }
  
      /** @return a substring */
      public String substring(int offset)
      {
          return src.substring(offset);
      }
  
      /** @return a character at the specified position. */
      public char charAt(int pos)
      {
          return src.charAt(pos);
      }
  
      /** @return <tt>true</tt> iff if the specified index is after the end of
the character stream */
      public boolean isEnd(int pos)
      {
          return (pos >= src.length());
      }
  }
  
  
  

Mime
View raw message