jakarta-regexp-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From vgritse...@apache.org
Subject svn commit: r515865 - in /jakarta/regexp/trunk: docs/ src/java/org/apache/regexp/ xdocs/
Date Thu, 08 Mar 2007 00:28:22 GMT
Author: vgritsenko
Date: Wed Mar  7 16:28:21 2007
New Revision: 515865

URL: http://svn.apache.org/viewvc?view=rev&rev=515865
Log:
Fix bug #38331: RE compiler creates incorrect program if pattern results in large program
with offsets exceeding capacity of the short

Modified:
    jakarta/regexp/trunk/docs/changes.html
    jakarta/regexp/trunk/docs/jakarta-regexp.jar
    jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
    jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java
    jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java
    jakarta/regexp/trunk/src/java/org/apache/regexp/REProgram.java
    jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java
    jakarta/regexp/trunk/xdocs/changes.xml

Modified: jakarta/regexp/trunk/docs/changes.html
URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/docs/changes.html?view=diff&rev=515865&r1=515864&r2=515865
==============================================================================
--- jakarta/regexp/trunk/docs/changes.html (original)
+++ jakarta/regexp/trunk/docs/changes.html Wed Mar  7 16:28:21 2007
@@ -92,6 +92,10 @@
 <h3>Version 1.5-dev</h3>
 <ul>
 <li>Fixed Bug
+    <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=38331">38331</a>:
+    RE compiler creates incorrect program if pattern results in large program
+    with offsets exceeding capacity of the short (VG)</li>
+<li>Fixed Bug
     <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=37275">37275</a>:
     RE incorrectly processes nested {n,m} closures (ex: (A{1}){0,2}) (VG)</li>
 <li>Added accessor for REProgram.prefix (VG)</li>

Modified: jakarta/regexp/trunk/docs/jakarta-regexp.jar
URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/docs/jakarta-regexp.jar?view=diff&rev=515865&r1=515864&r2=515865
==============================================================================
Binary files - no diff available.

Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java?view=diff&rev=515865&r1=515864&r2=515865
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java Wed Mar  7 16:28:21 2007
@@ -812,7 +812,7 @@
         for (int node = firstNode; node < lastNode; )
         {
             opcode = instruction[node + offsetOpcode];
-            next   = node + (short)instruction[node + offsetNext];
+            next   = node + (short) instruction[node + offsetNext];
             opdata = instruction[node + offsetOpdata];
 
             switch (opcode)
@@ -1292,7 +1292,7 @@
                     }
 
                     // Try all available branches
-                    short nextBranch;
+                    int nextBranch;
                     do
                     {
                         // Try matching the branch against the string
@@ -1302,7 +1302,7 @@
                         }
 
                         // Go to next branch (if any)
-                        nextBranch = (short)instruction[node + offsetNext];
+                        nextBranch = (short) instruction[node + offsetNext];
                         node += nextBranch;
                     }
                     while (nextBranch != 0 && (instruction[node + offsetOpcode] ==
OP_BRANCH));

Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java
URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java?view=diff&rev=515865&r1=515864&r2=515865
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java Wed Mar  7 16:28:21 2007
@@ -148,7 +148,7 @@
         // Move everything from insertAt to the end down nodeSize elements
         System.arraycopy(instruction, insertAt, instruction, insertAt + RE.nodeSize, lenInstruction
- insertAt);
         instruction[insertAt + RE.offsetOpcode] = opcode;
-        instruction[insertAt + RE.offsetOpdata] = (char)opdata;
+        instruction[insertAt + RE.offsetOpdata] = (char) opdata;
         instruction[insertAt + RE.offsetNext] = 0;
         lenInstruction += RE.nodeSize;
     }
@@ -169,20 +169,29 @@
             // if the node we are supposed to point to is in the chain then
             // point to the end of the program instead.
             // Michael McCallum <gholam@xtra.co.nz>
-            // FIXME: // This is a _hack_ to stop infinite programs.
+            // FIXME: This is a _hack_ to stop infinite programs.
             // I believe that the implementation of the reluctant matches is wrong but
             // have not worked out a better way yet.
             if ( node == pointTo ) {
-              pointTo = lenInstruction;
+                pointTo = lenInstruction;
             }
             node += next;
             next = instruction[node + RE.offsetNext];
         }
+
         // if we have reached the end of the program then dont set the pointTo.
         // im not sure if this will break any thing but passes all the tests.
         if ( node < lenInstruction ) {
+            // Some patterns result in very large programs which exceed
+            // capacity of the short used for specifying signed offset of the
+            // next instruction. Example: a{1638} 
+            int offset = pointTo - node;
+            if (offset != (short) offset) {
+                throw new RESyntaxException("Exceeded short jump range.");
+            }
+
             // Point the last node in the chain to pointTo.
-            instruction[node + RE.offsetNext] = (char)(short)(pointTo - node);
+            instruction[node + RE.offsetNext] = (char) (short) offset;
         }
     }
 

Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java
URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java?view=diff&rev=515865&r1=515864&r2=515865
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java Wed Mar  7 16:28:21
2007
@@ -150,9 +150,9 @@
         for (int i = 0; i < lenInstruction; )
         {
             // Get opcode, opdata and next fields of current program node
-            char opcode =        instruction[i + RE.offsetOpcode];
-            char opdata =        instruction[i + RE.offsetOpdata];
-            short next  = (short)instruction[i + RE.offsetNext];
+            char opcode =         instruction[i + RE.offsetOpcode];
+            char opdata =         instruction[i + RE.offsetOpdata];
+            int  next   = (short) instruction[i + RE.offsetNext];
 
             // Display the current program node
             p.print(i + ". " + nodeToString(i) + ", next = ");

Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/REProgram.java
URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/REProgram.java?view=diff&rev=515865&r1=515864&r2=515865
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/REProgram.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/REProgram.java Wed Mar  7 16:28:21 2007
@@ -117,7 +117,7 @@
             if (lenInstruction >= RE.nodeSize && instruction[0 + RE.offsetOpcode]
== RE.OP_BRANCH)
             {
                 // to the end node
-                char next = instruction[0 + RE.offsetNext];
+                int next = (short) instruction[0 + RE.offsetNext];
                 if (instruction[next + RE.offsetOpcode] == RE.OP_END && lenInstruction
>= (RE.nodeSize * 2))
                 {
                     final char nextOp = instruction[RE.nodeSize + RE.offsetOpcode];

Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java
URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java?view=diff&rev=515865&r1=515864&r2=515865
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java Wed Mar  7 16:28:21 2007
@@ -425,6 +425,14 @@
         if (r.match("a\u2029b")) {
             fail("\"a\\u2029b\" matches \"^a.*b$\"");
         }
+
+        // Bug 38331: Large program
+        try {
+            new RE("a{8192}");
+            fail("a{8192} should fail to compile.");
+        } catch (RESyntaxException e) {
+            // expected
+        }
     }
 
     private void testPrecompiledRE()
@@ -631,7 +639,7 @@
         final String matchAgainst = br.readLine();
         final boolean badPattern = "ERR".equals(matchAgainst);
         boolean shouldMatch = false;
-        int expectedParenCount = 0;
+        int expectedParenCount;
         String[] expectedParens = null;
 
         if (!badPattern) {

Modified: jakarta/regexp/trunk/xdocs/changes.xml
URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/xdocs/changes.xml?view=diff&rev=515865&r1=515864&r2=515865
==============================================================================
--- jakarta/regexp/trunk/xdocs/changes.xml (original)
+++ jakarta/regexp/trunk/xdocs/changes.xml Wed Mar  7 16:28:21 2007
@@ -35,6 +35,10 @@
 <h3>Version 1.5-dev</h3>
 <ul>
 <li>Fixed Bug
+    <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=38331">38331</a>:
+    RE compiler creates incorrect program if pattern results in large program
+    with offsets exceeding capacity of the short (VG)</li>
+<li>Fixed Bug
     <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=37275">37275</a>:
     RE incorrectly processes nested {n,m} closures (ex: (A{1}){0,2}) (VG)</li>
 <li>Added accessor for REProgram.prefix (VG)</li>



---------------------------------------------------------------------
To unsubscribe, e-mail: regexp-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: regexp-dev-help@jakarta.apache.org


Mime
View raw message