jakarta-oro-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From d..@apache.org
Subject cvs commit: jakarta-oro/src/java/org/apache/oro/text/regex Util.java
Date Tue, 19 Feb 2002 04:54:29 GMT
dfs         02/02/18 20:54:29

  Modified:    .        CHANGES CONTRIBUTORS
               src/java/examples jdfix.java
               src/java/org/apache/oro/text/perl Perl5Util.java
               src/java/org/apache/oro/text/regex Util.java
  Log:
  Added missing int Perl5Util.substitute(...) method and fixed some documentation.
  
  Revision  Changes    Path
  1.24      +6 -1      jakarta-oro/CHANGES
  
  Index: CHANGES
  ===================================================================
  RCS file: /home/cvs/jakarta-oro/CHANGES,v
  retrieving revision 1.23
  retrieving revision 1.24
  diff -u -r1.23 -r1.24
  --- CHANGES	1 Feb 2002 09:38:19 -0000	1.23
  +++ CHANGES	19 Feb 2002 04:54:29 -0000	1.24
  @@ -1,6 +1,11 @@
  -$Id: CHANGES,v 1.23 2002/02/01 09:38:19 dfs Exp $
  +$Id: CHANGES,v 1.24 2002/02/19 04:54:29 dfs Exp $
   
   Version 2.0.x
  +
  +o Removed some leftover references to OROMatcher in the Perl5Util javadocs.
  +
  +o Added an int substitute(...) method to Perl5Util to correspond to
  +  the similar method added to org.apache.oro.text.regex.Util in v2.0.3
   
   o Removed ant and support jars from distribution and moved build.xml to
     top level directory.  From now on, you must have ant installed on your
  
  
  
  1.5       +4 -1      jakarta-oro/CONTRIBUTORS
  
  Index: CONTRIBUTORS
  ===================================================================
  RCS file: /home/cvs/jakarta-oro/CONTRIBUTORS,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- CONTRIBUTORS	17 May 2001 00:44:41 -0000	1.4
  +++ CONTRIBUTORS	19 Feb 2002 04:54:29 -0000	1.5
  @@ -1,4 +1,4 @@
  -$Id: CONTRIBUTORS,v 1.4 2001/05/17 00:44:41 dfs Exp $
  +$Id: CONTRIBUTORS,v 1.5 2002/02/19 04:54:29 dfs Exp $
   
   Daniel Savarese <dfs at savarese.org> is the original author of the
   OROMatcher, PerlTools, AwkTools, and TextTools packages that became
  @@ -14,3 +14,6 @@
   Mark Murphy <markm at tyrell.com> has contributed performance
   improvements to Perl5Substitution as well as adding support for
   \UuLlE and escaping of $.
  +
  +Michael Davey <michael.davey at sun.com> fixed some documentation and
  +added a missing int substitute(...) method to Perl5Util.
  
  
  
  1.5       +17 -10    jakarta-oro/src/java/examples/jdfix.java
  
  Index: jdfix.java
  ===================================================================
  RCS file: /home/cvs/jakarta-oro/src/java/examples/jdfix.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- jdfix.java	20 May 2001 23:55:15 -0000	1.4
  +++ jdfix.java	19 Feb 2002 04:54:29 -0000	1.5
  @@ -56,7 +56,7 @@
    */
   
   /*
  - * $Id: jdfix.java,v 1.4 2001/05/20 23:55:15 dfs Exp $
  + * $Id: jdfix.java,v 1.5 2002/02/19 04:54:29 dfs Exp $
    */
   import java.io.*;
   
  @@ -77,8 +77,10 @@
      * Notice that the Java program is only so much longer because of all
      * of the I/O exception handling and InputStream creation.  The core
      * while loop is EXACTLY the same length as the while loop in the Perl
  -   * script.  This program uses DataInputStream, readLine(), and 
  -   * PrintStream for JDK 1.0.2 compatibility.
  +   * script.  The number of substitutions performed is printed to standard
  +   * output as additional information.  Note, this is not an efficient way
  +   * to do this job; it is better to first read the entire file into a
  +   * character array.
      * <p>
      * This is a simple program that takes a javadoc generated HTML file as
      * input and produces as output the same HTML file, except with a white
  @@ -106,10 +108,12 @@
       BufferedReader input = null;
       PrintWriter output    = null;
       Perl5Util perl;
  +    StringBuffer result = new StringBuffer();
  +    int numSubs = 0;
   
       if(args.length < 2) {
         System.err.println("Usage: jdfix input output");
  -      System.exit(1);
  +      return;
       }
   
       try {
  @@ -118,7 +122,7 @@
       } catch(IOException e) {
         System.err.println("Error opening input file: " + args[0]);
         e.printStackTrace();
  -      System.exit(1);
  +      return;
       }
   
       try {
  @@ -127,20 +131,23 @@
       } catch(IOException e) {
         System.err.println("Error opening output file: " + args[1]);
         e.printStackTrace();
  -      System.exit(1);
  +      return;
       } 
   
       perl = new Perl5Util();
   
       try {
         while((line = input.readLine()) != null) {
  -	line = perl.substitute("s/<body>/<body bgcolor=\"#ffffff\">/", line);
  -	output.println(line);
  +	numSubs+=perl.substitute(result,
  +				 "s/<body>/<body bgcolor=\"#ffffff\">/", line);
  +	result.append('\n');
         }
  +      output.print(result.toString());
  +      System.out.println("Substitutions made: " + numSubs);
       } catch(IOException e) {
         System.err.println("Error reading from input: " + args[1]);
         e.printStackTrace();
  -      System.exit(1);
  +      return;
       } finally {
         try {
   	input.close();
  @@ -148,7 +155,7 @@
         } catch(IOException e) {
   	System.err.println("Error closing files.");
   	e.printStackTrace();
  -	System.exit(1);
  +	return;
         }
       }
     }
  
  
  
  1.11      +88 -48    jakarta-oro/src/java/org/apache/oro/text/perl/Perl5Util.java
  
  Index: Perl5Util.java
  ===================================================================
  RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/perl/Perl5Util.java,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- Perl5Util.java	8 Dec 2001 03:15:25 -0000	1.10
  +++ Perl5Util.java	19 Feb 2002 04:54:29 -0000	1.11
  @@ -58,11 +58,12 @@
    */
   
   /*
  - * $Id: Perl5Util.java,v 1.10 2001/12/08 03:15:25 dfs Exp $
  + * $Id: Perl5Util.java,v 1.11 2002/02/19 04:54:29 dfs Exp $
    */
   import java.util.*;
  -import org.apache.oro.text.regex.*;
  +
   import org.apache.oro.text.*;
  +import org.apache.oro.text.regex.*;
   import org.apache.oro.util.*;
   
   /**
  @@ -77,7 +78,7 @@
    * the slashes.
    *  <p>
    * The objective of the class is to minimize the amount of code a Java
  - * programmer using OROMatcher<font size="-2"><sup>TM</sup></font>
  + * programmer using Jakarta-ORO
    * has to write to achieve the same results as Perl by 
    * transparently handling regular expression compilation, caching, and
    * matching.  A second objective is to use the same Perl pattern matching
  @@ -86,7 +87,7 @@
    * All the state affecting methods are synchronized to avoid
    * the maintenance of explicit locks in multithreaded programs.  This
    * philosophy differs from the
  - * OROMatcher<font size="-2"><sup>TM</sup></font> package, where
  + * {@link org.apache.oro.text.regex} package, where
    * you are expected to either maintain explicit locks, or more preferably
    * create separate compiler and matcher instances for each thread.
    * <p>
  @@ -123,10 +124,12 @@
    * <p>
    * A couple of things to remember when using this class are that the
    * {@link #match match()} methods have the same meaning as
  - * contains() in OROMatcher<font size="-2"><sup>TM</sup></font>
  + * {@link org.apache.oro.text.regex.Perl5Matcher#contains
  + *  Perl5Matcher.contains()}
    * and <code>=~ m/pattern/</code> in Perl.  The methods are named match
    * to more closely associate them with Perl and to differentiate them
  - * from matches() in OROMatcher<font size="-2"><sup>TM</sup></font>.
  + * from {@link org.apache.oro.text.regex.Perl5Matcher#matches
  + * Perl5Matcher.matches()}.
    * A further thing to keep in mind is that the
    * {@link MalformedPerl5PatternException} class is derived from
    * RuntimeException which means you DON'T have to catch it.  The reasoning
  @@ -143,12 +146,11 @@
    * programs to be robust.
    * <p>
    * Finally, as a convenience Perl5Util implements 
  - * the org.apache.oro.text.regex.MatchResult interface found in the
  - * OROMatcher<font size="-2"><sup>TM</sup></font> package.  The
methods
  - * are merely wrappers which call the corresponding method of the last
  - * MatchResult found (which can be accessed with
  - * {@link #getMatch()} by a match or substitution
  - * (or even a split, but this isn't particularly useful).
  + * the {@link org.apache.oro.text.regex.MatchResult MatchResult} interface.
  + * The methods are merely wrappers which call the corresponding method of
  + * the last {@link org.apache.oro.text.regex.MatchResult MatchResult}
  + * found (which can be accessed with {@link #getMatch()} by a match or
  + * substitution (or even a split, but this isn't particularly useful).
    *
    * @author <a href="mailto:oro-dev@jakarta.apache.org">Daniel F. Savarese</a>
    * @version @version@
  @@ -384,7 +386,8 @@
      * As with Perl, any non-alphanumeric character can be used in lieu of
      * the slashes.
      * <p>
  -   * If the input contains the pattern, the org.apache.oro.text.regex.MatchResult
  +   * If the input contains the pattern, the
  +   * {@link org.apache.oro.text.regex.MatchResult MatchResult}
      * can be obtained by calling {@link #getMatch()}.
      * However, Perl5Util implements the MatchResult interface as a wrapper
      * around the last MatchResult found, so you can call its methods to
  @@ -423,7 +426,8 @@
      * As with Perl, any non-alphanumeric character can be used in lieu of
      * the slashes.
      * <p>
  -   * If the input contains the pattern, the org.apache.oro.text.regex.MatchResult
  +   * If the input contains the pattern, the
  +   * {@link org.apache.oro.text.regex.MatchResult MatchResult}
      * can be obtained by calling {@link #getMatch()}.
      * However, Perl5Util implements the MatchResult interface as a wrapper
      * around the last MatchResult found, so you can call its methods to
  @@ -431,7 +435,8 @@
      * After the call to this method, the PatternMatcherInput current offset
      * is advanced to the end of the match, so you can use it to repeatedly
      * search for expressions in the entire input using a while loop as
  -   * explained in the OROMatcher<font size="-2"><sup>TM</sup></font>
package.
  +   * explained in the {@link org.apache.oro.text.regex.PatternMatcherInput
  +   * PatternMatcherIinput} documentation.
      * <p>
      * @param pattern  The pattern to search for.
      * @param input    The PatternMatcherInput to search.
  @@ -489,13 +494,14 @@
      * <dt> m <dd> treat the input as consisting of multiple lines
      * <dt> o <dd> If variable interopolation is used, only evaluate the
      *             interpolation once (the first time).  This is equivalent
  -   *             to using a numInterpolations argument of 1 in the 
  -   *             OROMatcher<font size="-2"><sup>TM</sup></font>

  -   *             Util.substitute() method.  The default is to compute
  -   *             each interpolation independently.  See the
  -   *             OROMatcher<font size="-2"><sup>TM</sup></font>
  -   *             Util.substitute() method for more details on variable
  -   *             interpolation in substitutions.
  +   *             to using a numInterpolations argument of 1 in
  +   * {@link org.apache.oro.text.regex.Util#substitute Util.substitute()}.
  +   *             The default is to compute each interpolation independently.
  +   *             See
  +   * {@link org.apache.oro.text.regex.Util#substitute Util.substitute()}
  +   * and {@link org.apache.oro.text.regex.Perl5Substitution Perl5Substitution}
  +   *             for more details on variable interpolation in
  +   *             substitutions.
      * <dt> s <dd> treat the input as consisting of a single line
      * <dt> x <dd> enable extended expression syntax incorporating whitespace
      *             and comments
  @@ -504,11 +510,11 @@
      * the slashes.  This is helpful to avoid backslashing.  For example,
      * using slashes you would have to do:
      * <blockquote><pre>
  -   * result = util.substitute("s/foo\\/bar/goo\\/\\/baz/", input);
  +   * numSubs = util.substitute(result, "s/foo\\/bar/goo\\/\\/baz/", input);
      * </pre></blockquote>
      * when you could more easily write:
      * <blockquote><pre>
  -   * result = util.substitute("s#foo/bar#goo//baz#", input);
  +   * numSubs = util.substitute(result, "s#foo/bar#goo//baz#", input);
      * </pre></blockquote>
      * where the hashmarks are used instead of slashes.
      * <p>
  @@ -535,25 +541,28 @@
      * used to be an invalid expression and require a special-case extra
      * backslash, will now replace all instances of / with \:
      * <blockquote><pre>
  -   * result = util.substitute("s#/#\\#g", input);
  +   * numSubs = util.substitute(result, "s#/#\\#g", input);
      * </pre></blockquote>
      * <p>
  -   * @param expression The substitution expression.
  -   * @param input      The input.
  -   * @return           The input after substitutions have been performed.
  +   * @param result     The StringBuffer in which to store the result of the
  +   *                   substitutions. The buffer is only appended to.
  +   * @param expression The Perl5 substitution regular expression.
  +   * @param input      The input on which to perform substitutions.
  +   * @return The number of substitutions made.
      * @exception MalformedPerl5PatternException  If there is an error in
      *            the expression.  You are not forced to catch this exception
      *            because it is derived from RuntimeException.
  +   * @since 2.0.6
      */
     // Expression parsing will have to be moved into a separate method if
     // there are going to be variations of this method.
  -  public synchronized String substitute(String expression, String input)
  +  public synchronized int substitute(StringBuffer result, String expression,
  +				     String input)
          throws MalformedPerl5PatternException 
     {
       boolean backslash, finalDelimiter;
       int index, compileOptions, numSubstitutions, numInterpolations;
  -    int firstOffset, secondOffset, thirdOffset;
  -    String result;
  +    int firstOffset, secondOffset, thirdOffset, subCount;
       StringBuffer replacement;
       Pattern compiledPattern;
       char exp[], delimiter;
  @@ -575,12 +584,14 @@
   	break __nullTest;
         }
   
  -      result = Util.substitute(__matcher, entry._pattern, entry._substitution,
  -			       input, entry._numSubstitutions);
  +
  +      subCount =
  +	Util.substitute(result, __matcher, entry._pattern, entry._substitution,
  +			input, entry._numSubstitutions);
   
         __lastMatch = __matcher.getMatch();
   
  -      return result;
  +      return subCount;
       }
   
       exp = expression.toCharArray();
  @@ -680,15 +691,46 @@
   					numSubstitutions);
       __expressionCache.addElement(expression, entry);
   
  -    result = Util.substitute(__matcher, compiledPattern, substitution,
  -			     input, numSubstitutions);
  +    subCount =
  +      Util.substitute(result, __matcher, compiledPattern, substitution,
  +		      input, numSubstitutions);
   
       __lastMatch = __matcher.getMatch();
   
  -    return result;
  +    return subCount;
     }
   
  -
  +  /**
  +   * Substitutes a pattern in a given input with a replacement string.
  +   * The substitution expression is specified in Perl5 native format.
  +   * <dl compact>
  +   *   <dt>Calling this method is the same as:</dt>
  +   *   <dd>
  +   *     <blockquote><pre>
  +   *      String result;
  +   *      StringBuffer buffer = new StringBuffer();
  +   *      perl.substitute(buffer, expression, input);
  +   *      result = buffer.toString();
  +   *     </pre></blockquote>
  +   *   </ddD>
  +   * </dl>
  +   * @param expression The Perl5 substitution regular expression.
  +   * @param input      The input on which to perform substitutions.
  +   * @return  The input as a String after substitutions have been performed.
  +   * @exception MalformedPerl5PatternException  If there is an error in
  +   *            the expression.  You are not forced to catch this exception
  +   *            because it is derived from RuntimeException.
  +   * @since 1.0
  +   * @see #substitute
  +   */
  +  public synchronized String substitute(String expression, String input)
  +    throws MalformedPerl5PatternException
  +  {
  +    StringBuffer result = new StringBuffer();
  +    substitute(result, expression, input);
  +    return result.toString();
  +  }
  + 
     /**
      * Splits a String into strings that are appended to a List, but no more
      * than a specified limit.  The String is split using a regular expression
  @@ -724,9 +766,8 @@
      * <blockquote><pre>
      * { "8", "-", "12", ",", "15", ",", "18" }
      * </pre></blockquote>
  -   * The Util.split() method in the
  -   * OROMatcher<font size="-2"><sup>TM</sup></font> package does
NOT
  -   * implement this particular behavior because it is intended to
  +   * The {@link org.apache.oro.text.regex.Util#split Util.split()} method
  +   * does NOT implement this particular behavior because it is intended to
      * be usable with Pattern instances other than Perl5Pattern.
      * <p>
      * @param results 
  @@ -848,9 +889,8 @@
      * <blockquote><pre>
      * { "8", "-", "12", ",", "15", ",", "18" }
      * </pre></blockquote>
  -   * The Util.split() method in the
  -   * OROMatcher<font size="-2"><sup>TM</sup></font> package does
NOT
  -   * implement this particular behavior because it is intended to
  +   * The {@link org.apache.oro.text.regex.Util#split Util.split()} method
  +   * does NOT implement this particular behavior because it is intended to
      * be usable with Pattern instances other than Perl5Pattern.
      * <p>
      * @deprecated Use
  @@ -1023,7 +1063,7 @@
   
   
     /**
  -   * Returns the part of the input preceding that last match found.
  +   * Returns the part of the input preceding the last match found.
      * <p>
      * @return The part of the input following the last match found.
      */
  @@ -1067,7 +1107,7 @@
   
   
     /**
  -   * Returns the part of the input following that last match found.
  +   * Returns the part of the input following the last match found.
      * <p>
      * @return The part of the input following the last match found.
      */
  @@ -1110,7 +1150,7 @@
   
   
     /**
  -   * Returns the part of the input preceding that last match found as a
  +   * Returns the part of the input preceding the last match found as a
      * char array.  This method eliminates the extra
      * buffer copying caused by preMatch().toCharArray().
      * <p>
  @@ -1161,7 +1201,7 @@
   
   
     /**
  -   * Returns the part of the input following that last match found as a char
  +   * Returns the part of the input following the last match found as a char
      * array.  This method eliminates the extra buffer copying caused by
      * preMatch().toCharArray().
      * <p>
  
  
  
  1.10      +20 -20    jakarta-oro/src/java/org/apache/oro/text/regex/Util.java
  
  Index: Util.java
  ===================================================================
  RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/regex/Util.java,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- Util.java	17 Jan 2002 19:29:15 -0000	1.9
  +++ Util.java	19 Feb 2002 04:54:29 -0000	1.10
  @@ -58,7 +58,7 @@
    */
   
   /*
  - * $Id: Util.java,v 1.9 2002/01/17 19:29:15 dfs Exp $
  + * $Id: Util.java,v 1.10 2002/02/19 04:54:29 dfs Exp $
    */
   import java.util.*;
   
  @@ -151,7 +151,7 @@
      *         regular expression will be used to split the input.
      * @param matcher The regular expression matcher to execute the split.
      * @param pattern The regular expression to use as a split delimiter.
  -   * @param input  The <code>String</code> to split.
  +   * @param input   The <code>String</code> to split.
      * @param limit  The limit on the number of resulting split elements.
      *               Values <= 0 produce the same behavior as using the
      *               <b>SPLIT_ALL</b> constant which causes the limit to be 
  @@ -214,7 +214,7 @@
      *         occurences are appended.
      * @param matcher The regular expression matcher to execute the split.
      * @param pattern The regular expression to use as a split delimiter.
  -   * @param input  The <code>String</code> to split.
  +   * @param input   The <code>String</code> to split.
      * @since 2.0
      */
     public static void split(Collection results,  PatternMatcher matcher,
  @@ -309,7 +309,7 @@
      * {@link #split(Collection, PatternMatcher, Pattern, String)} instead.
      * @param matcher The regular expression matcher to execute the split.
      * @param pattern The regular expression to use as a split delimiter.
  -   * @param input  The <code>String</code> to split.
  +   * @param input   The <code>String</code> to split.
      * @return A <code>Vector</code> containing all the substrings of the input
      *         that occur between the regular expression delimiter occurences.
      * @since 1.0
  @@ -333,11 +333,11 @@
      * @param pattern The regular expression to search for and substitute
      *                occurrences of.
      * @param sub     The Substitution used to substitute pattern occurences.
  -   * @param input  The <code>String</code> on which to perform substitutions.
  -   * @param numSubs  The number of substitutions to perform.  Only the
  -   *                 first <b> numSubs </b> patterns encountered are
  -   *                 substituted.  If you want to substitute all occurences
  -   *                 set this parameter to <b> SUBSTITUTE_ALL </b>.
  +   * @param input   The <code>String</code> on which to perform substitutions.
  +   * @param numSubs The number of substitutions to perform.  Only the
  +   *                first <b> numSubs </b> patterns encountered are
  +   *                substituted.  If you want to substitute all occurences
  +   *                set this parameter to <b> SUBSTITUTE_ALL </b>.
      * @return A String comprising the input string with the substitutions,
      *         if any, made.  If no substitutions are made, the returned String
      *         is the original input String.
  @@ -372,7 +372,7 @@
      * @param pattern The regular expression to search for and substitute
      *                occurrences of.
      * @param sub     The Substitution used to substitute pattern occurences.
  -   * @param input  The <code>String</code> on which to perform substitutions.
  +   * @param input   The <code>String</code> on which to perform substitutions.
      * @return A String comprising the input string with the substitutions,
      *         if any, made.  If no substitutions are made, the returned String
      *         is the original input String.
  @@ -399,11 +399,11 @@
      * @param pattern The regular expression to search for and substitute
      *                occurrences of.
      * @param sub     The Substitution used to substitute pattern occurences.
  -   * @param input  The input on which to perform substitutions.
  -   * @param numSubs  The number of substitutions to perform.  Only the
  -   *                 first <b> numSubs </b> patterns encountered are
  -   *                 substituted.  If you want to substitute all occurences
  -   *                 set this parameter to <b> SUBSTITUTE_ALL </b>.
  +   * @param input   The input on which to perform substitutions.
  +   * @param numSubs The number of substitutions to perform.  Only the
  +   *                first <b> numSubs </b> patterns encountered are
  +   *                substituted.  If you want to substitute all occurences
  +   *                set this parameter to <b> SUBSTITUTE_ALL </b>.
      * @return The number of substitutions made.
      * @since 2.0.5
      */
  @@ -431,11 +431,11 @@
      * @param pattern The regular expression to search for and substitute
      *                occurrences of.
      * @param sub     The Substitution used to substitute pattern occurences.
  -   * @param input  The input on which to perform substitutions.
  -   * @param numSubs  The number of substitutions to perform.  Only the
  -   *                 first <b> numSubs </b> patterns encountered are
  -   *                 substituted.  If you want to substitute all occurences
  -   *                 set this parameter to <b> SUBSTITUTE_ALL </b>.
  +   * @param input   The input on which to perform substitutions.
  +   * @param numSubs The number of substitutions to perform.  Only the
  +   *                first <b> numSubs </b> patterns encountered are
  +   *                substituted.  If you want to substitute all occurences
  +   *                set this parameter to <b> SUBSTITUTE_ALL </b>.
      * @return The number of substitutions made.
      * @since 2.0.3
      */
  
  
  

--
To unsubscribe, e-mail:   <mailto:oro-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:oro-dev-help@jakarta.apache.org>


Mime
View raw message