jakarta-oro-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ravi" <r...@htinc.com>
Subject RE: Performance
Date Fri, 09 May 2003 08:07:35 GMT
I'm attaching the code that I used. I don't know why it's taking a lot
of time. There should be something wrong with my regular expressions or
with my code. Can somebody please look at it and let me know what's
wrong. You can try any piece of text as input. I would really really
appreciate it. Thanks in advance.

private static Perl5Compiler compiler;
private static PatternMatcher matcher;
private static Perl5Substitution substitution;

public static void main(String args[])
{
compiler = new Perl5Compiler();
matcher = new Perl5Matcher();
substitution = new Perl5Substitution();
String text = args[0];
Pattern pattern = getPattern("\t");
text = replaceText(pattern," ",text);
pattern = getPattern("[\\[\\]\\{\\}\\^\\~?!()\";/\\|,<>`]");
text = replaceText(pattern," $& ",text);
pattern = getPattern("^('|&)");
text = replaceText(pattern,"$& ",text);
pattern = getPattern("([^A-Za-z0-9])('|&|@|%|\\*)");
text = replaceText(pattern,"$& ",text);
pattern =
getPattern("('|:|-|#|\\*|\\+|\\$|&|@|'S|'D|'M|'LL|'RE|'VE|N'T|'s|'d|'m|'
ll|'re|'ve|n't)$");
text = replaceText(pattern," $&",text);
pattern =
getPattern("('|:|-|#|\\*|\\+|\\$|&|@|'S|'D|'M|'LL|'RE|'VE|N'T|'s|'d|'m|'
ll|'re|'ve|n't)([^A-Za-z0-9])");
text = replaceText(pattern," $&",text);
StringTokenizer strTok = new StringTokenizer(text);
while(strTok.hasMoreTokens())
 {
  String token = strTok.nextToken();
  token = token.trim()
  pattern = getPattern("([A-Za-z0-9][.])$");
  if(contains(pattern,token))
  {
	pattern =
getPattern("^([A-Za-z]\\.([A-Za-z]\\.)+|[A-Za-z]\\.|[A-Z][bcdfghj-np-tvx
z]+\\.)$");
	if(contains(pattern,token))
	{
	   ///code to process token which does not use any regex stuff
	}
  }
  else
  {
	pattern = getPattern("^([A-Za-z0-9])");
	if(contains(pattern,token))
	{
 	 pattern =
getPattern("([A-Za-z0-9]+\\.[A-Za-z]+|[0-9]+\\.[A-Za-z])");
	   if(contains(pattern,token))
	   {
		////code to process token which does not use any regex
stuff
	   }
	   else
	   {
	      if(contains(getPattern("^([A-Za-z])"),token))
	      {
		   //code to process token which does not use any regex
stuff
	      }
	   }
	}
	else
	{
	   pattern = getPattern("([.!?])$");
	   if(contains(pattern,token))
	   {
		//code to process token which does not use any regex
stuff
	   }
	}
    }
 }
}
public static boolean contains(Pattern pattern,String str)
{
   return matcher.contains(str,pattern);
}
public static String replaceText(Pattern pattern,String
replacement,String str)
{
   substitution.setSubstitution(replacement);
   return
Util.substitute(matcher,pattern,substitution,str,Util.SUBSTITUTE_ALL);

}
public static Pattern getPattern(String pattern)
{
   return compiler.compile(pattern);
}

Thanks,
Ravi.


---------------------------------------------------------------------
To unsubscribe, e-mail: oro-user-unsubscribe@jakarta.apache.org
For additional commands, e-mail: oro-user-help@jakarta.apache.org


Mime
View raw message