jakarta-regexp-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From vgritse...@apache.org
Subject svn commit: r517946 - in /jakarta/regexp/trunk/src/java/org/apache/regexp: RE.java RETest.java
Date Wed, 14 Mar 2007 00:22:28 GMT
Author: vgritsenko
Date: Tue Mar 13 17:22:27 2007
New Revision: 517946

URL: http://svn.apache.org/viewvc?view=rev&rev=517946
Log:
optimize inner loop

Modified:
    jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
    jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java

Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java?view=diff&rev=517946&r1=517945&r2=517946
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java Tue Mar 13 17:22:27 2007
@@ -818,9 +818,11 @@
             switch (opcode)
             {
                 case OP_MAYBE:
+                case OP_STAR:
                     {
-                        // Try to match the following subexpr.
-                        // If it succeeds, it will continue matching by itself without returning
here.
+                        // Try to match the following subexpr. If it matches:
+                        //   MAYBE:  Continues matching rest of the expression
+                        //    STAR:  Points back here to repeat subexpr matching
                         if ((idxNew = matchNodes(node + nodeSize, maxNode, idx)) != -1)
                         {
                             return idxNew;
@@ -832,7 +834,7 @@
 
                 case OP_PLUS:
                     {
-                        // Try to match the following subexpr again (and again (and ...
+                        // Try to match the subexpr again (and again (and ...
                         if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
                         {
                             return idxNew;
@@ -840,24 +842,13 @@
 
                         // If failed, just continue with the rest of expression
                         // Rest is located at the next pointer of the next instruction
-                        // which must be OP_CONTINUE.
-                        node = next + instruction[next + offsetNext];
+                        // (which must be OP_CONTINUE)
+                        node = next + (short) instruction[next + offsetNext];
                         continue;
                     }
 
-                case OP_STAR:
-                    {
-                        // Try to match the following subexpr (and again (and again (and
...
-                        if ((idxNew = matchNodes(node + nodeSize, maxNode, idx)) != -1)
-                        {
-                            return idxNew;
-                        }
-
-                        // If failed, just continue with the rest of expression
-                        break;
-                    }
-
                 case OP_RELUCTANTMAYBE:
+                case OP_RELUCTANTSTAR:
                     {
                         // Try to match the rest without using the reluctant subexpr
                         if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
@@ -865,14 +856,16 @@
                             return idxNew;
                         }
 
-                        // Try reluctant subexpr, which continues to the rest of the expression
+                        // Try reluctant subexpr. If it matches:
+                        //   RELUCTANTMAYBE: Continues matching rest of the expression
+                        //    RELUCTANTSTAR: Points back here to repeat reluctant star matching
                         return matchNodes(node + nodeSize, next, idx);
                     }
 
                 case OP_RELUCTANTPLUS:
                     {
                         // Continue matching the rest without using the reluctant subexpr
-                        if ((idxNew = matchNodes(next + instruction[next + offsetNext], maxNode,
idx)) != -1)
+                        if ((idxNew = matchNodes(next + (short) instruction[next + offsetNext],
maxNode, idx)) != -1)
                         {
                             return idxNew;
                         }
@@ -881,17 +874,6 @@
                         break;
                     }
 
-                case OP_RELUCTANTSTAR:
-                    {
-                        // Try to match the rest without using the reluctant subexpr
-                        if ((idxNew = matchNodes(next, maxNode, idx)) != -1) {
-                            return idxNew;
-                        }
-
-                        // Try reluctant subexpr
-                        return matchNodes(node + nodeSize, next, idx);
-                    }
-
                 case OP_OPEN:
 
                     // Match subexpression
@@ -902,7 +884,7 @@
                     if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
                     {
                         // Increase valid paren count
-                        if ((opdata + 1) > parenCount)
+                        if (opdata >= parenCount)
                         {
                             parenCount = opdata + 1;
                         }
@@ -925,7 +907,7 @@
                     if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
                     {
                         // Increase valid paren count
-                        if ((opdata + 1) > parenCount)
+                        if (opdata  >= parenCount)
                         {
                             parenCount = opdata + 1;
                         }
@@ -938,11 +920,6 @@
                     }
                     return idxNew;
 
-                case OP_OPEN_CLUSTER:
-                case OP_CLOSE_CLUSTER:
-                    // starting or ending the matching of a subexpression which has no backref.
-                    return matchNodes(next, maxNode, idx);
-
                 case OP_BACKREF:
                     {
                         // Get the start and end of the backref
@@ -992,10 +969,9 @@
                         // If we're multiline matching, we could still be at the start of
a line
                         if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
                         {
-                            // If not at start of line, give up
-                            if (idx <= 0 || !isNewline(idx - 1)) {
-                                return -1;
-                            } else {
+                            // Continue if at the start of a line
+                            if (isNewline(idx - 1))
+                            {
                                 break;
                             }
                         }
@@ -1011,10 +987,9 @@
                         // If we're multi-line matching
                         if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
                         {
-                            // Give up if we're not at the end of a line
-                            if (!isNewline(idx)) {
-                                return -1;
-                            } else {
+                            // Continue if we're at the end of a line
+                            if (isNewline(idx))
+                            {
                                 break;
                             }
                         }
@@ -1321,6 +1296,7 @@
                 case OP_BRANCH:
                 {
                     // Check for choices
+                    // FIXME Dead code - only reason to keep is backward compat with pre-compiled
exprs. Remove?
                     if (instruction[next /* + offsetOpcode */] != OP_BRANCH)
                     {
                         // If there aren't any other choices, just evaluate this branch.
@@ -1347,6 +1323,10 @@
                     // Failed to match any branch!
                     return -1;
                 }
+
+                case OP_OPEN_CLUSTER:
+                case OP_CLOSE_CLUSTER:
+                    // starting or ending the matching of a subexpression which has no backref.
 
                 case OP_NOTHING:
                 case OP_GOTO:

Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java
URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java?view=diff&rev=517946&r1=517945&r2=517946
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java Tue Mar 13 17:22:27 2007
@@ -423,8 +423,10 @@
 
         // Bug 38331: Large program
         try {
-            new RE("a{8192}");
-            fail("a{8192} should fail to compile.");
+            REDebugCompiler c = new REDebugCompiler();
+            c.compile("(a{8192})?");
+            fail("(a{8192})? should fail to compile.");
+            c.dumpProgram();
         } catch (RESyntaxException e) {
             // expected
         }



---------------------------------------------------------------------
To unsubscribe, e-mail: regexp-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: regexp-dev-help@jakarta.apache.org


Mime
View raw message