commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mt...@apache.org
Subject svn commit: r807635 [1/2] - in /commons/sandbox/runtime/trunk/src/main/native: ./ include/ shared/ srclib/regex/ test/
Date Tue, 25 Aug 2009 13:57:08 GMT
Author: mturk
Date: Tue Aug 25 13:57:05 2009
New Revision: 807635

URL: http://svn.apache.org/viewvc?rev=807635&view=rev
Log:
Add Henry Spencer's regex prefixed with acr_

Added:
    commons/sandbox/runtime/trunk/src/main/native/srclib/regex/
    commons/sandbox/runtime/trunk/src/main/native/srclib/regex/cclass.h   (with props)
    commons/sandbox/runtime/trunk/src/main/native/srclib/regex/cname.h   (with props)
    commons/sandbox/runtime/trunk/src/main/native/srclib/regex/engine.c   (with props)
    commons/sandbox/runtime/trunk/src/main/native/srclib/regex/re_comp.h   (with props)
    commons/sandbox/runtime/trunk/src/main/native/srclib/regex/regcomp.c   (with props)
    commons/sandbox/runtime/trunk/src/main/native/srclib/regex/regerror.c   (with props)
    commons/sandbox/runtime/trunk/src/main/native/srclib/regex/regex.h   (with props)
    commons/sandbox/runtime/trunk/src/main/native/srclib/regex/regex2.h   (with props)
    commons/sandbox/runtime/trunk/src/main/native/srclib/regex/regexec.c   (with props)
    commons/sandbox/runtime/trunk/src/main/native/srclib/regex/regfree.c   (with props)
    commons/sandbox/runtime/trunk/src/main/native/srclib/regex/utils.h   (with props)
Modified:
    commons/sandbox/runtime/trunk/src/main/native/Makefile.in
    commons/sandbox/runtime/trunk/src/main/native/Makefile.msc.in
    commons/sandbox/runtime/trunk/src/main/native/include/acr_sbuf.h
    commons/sandbox/runtime/trunk/src/main/native/shared/ini.c
    commons/sandbox/runtime/trunk/src/main/native/shared/sbuf.c
    commons/sandbox/runtime/trunk/src/main/native/test/sample.conf
    commons/sandbox/runtime/trunk/src/main/native/test/sample.ini

Modified: commons/sandbox/runtime/trunk/src/main/native/Makefile.in
URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/Makefile.in?rev=807635&r1=807634&r2=807635&view=diff
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/Makefile.in (original)
+++ commons/sandbox/runtime/trunk/src/main/native/Makefile.in Tue Aug 25 13:57:05 2009
@@ -73,6 +73,7 @@
 	$(SRCDIR)/os/unix \
 	$(SRCDIR)/os/win32 \
 	$(SRCDIR)/modules/network/ssl \
+	$(SRCDIR)/srclib/regex \
 	$(SRCDIR)/srclib/zlib \
 	$(SRCDIR)/test
 
@@ -206,6 +207,12 @@
 	$(SRCDIR)/port/strlcpy.$(OBJ) \
 	$(SRCDIR)/port/strsignal.$(OBJ)
 
+REGEX_OBJS=\
+	$(SRCDIR)/srclib/regex/regcomp.$(OBJ) \
+	$(SRCDIR)/srclib/regex/regerror.$(OBJ) \
+	$(SRCDIR)/srclib/regex/regexec.$(OBJ) \
+	$(SRCDIR)/srclib/regex/regfree.$(OBJ)
+
 ZLIB_OBJS=\
 	$(SRCDIR)/srclib/zlib/adler32.$(OBJ) \
 	$(SRCDIR)/srclib/zlib/compress.$(OBJ) \
@@ -236,12 +243,12 @@
 .cpp.$(OBJ):
 	$(CXX) $(CFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(INCLUDES) -c -o $@ $<
 
-$(STATICLIB): $(PPORT_OBJS) $(COMMON_OBJS) $(@platform@_OBJS) $(ZLIB_OBJS) @testobjs@
-	$(AR) $(ARFLAGS) $@ $(COMMON_OBJS) $(@platform@_OBJS) $(ZLIB_OBJS) @testobjs@
+$(STATICLIB): $(PPORT_OBJS) $(COMMON_OBJS) $(@platform@_OBJS) $(REGEX_OBJS) $(ZLIB_OBJS) @testobjs@
+	$(AR) $(ARFLAGS) $@ $(COMMON_OBJS) $(@platform@_OBJS) $(REGEX_OBJS) $(ZLIB_OBJS) @testobjs@
 	$(RANLIB) $@
 
-$(SHAREDLIB): $(PPORT_OBJS) $(COMMON_OBJS) $(@platform@_OBJS) $(ZLIB_OBJS) @testobjs@ $(STATICLIB)
-	$(CC) $(SHFLAGS) $(PPORT_OBJS) $(COMMON_OBJS) $(@platform@_OBJS) $(ZLIB_OBJS) @testobjs@ $(LDFLAGS) -o $@
+$(SHAREDLIB): $(PPORT_OBJS) $(COMMON_OBJS) $(@platform@_OBJS) $(REGEX_OBJS) $(ZLIB_OBJS) @testobjs@ $(STATICLIB)
+	$(CC) $(SHFLAGS) $(PPORT_OBJS) $(COMMON_OBJS) $(@platform@_OBJS) $(REGEX_OBJS) $(ZLIB_OBJS) @testobjs@ $(LDFLAGS) -o $@
 
 $(SSLMODLIB): $(SHAREDLIB) $(OPENSSL_OBJS)
 	$(CC) $(EXLFLAGS) $(SHFLAGS) $(OPENSSL_OBJS) $(LDFLAGS) $(SSLFLAGS) -L. -l$(NAME) -o $@

Modified: commons/sandbox/runtime/trunk/src/main/native/Makefile.msc.in
URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/Makefile.msc.in?rev=807635&r1=807634&r2=807635&view=diff
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/Makefile.msc.in (original)
+++ commons/sandbox/runtime/trunk/src/main/native/Makefile.msc.in Tue Aug 25 13:57:05 2009
@@ -128,6 +128,12 @@
 	$(SRCDIR)/port/strlcpy.$(OBJ) \
 	$(SRCDIR)/port/strsignal.$(OBJ)
 
+REGEX_OBJS=\
+	$(SRCDIR)/srclib/regex/regcomp.$(OBJ) \
+	$(SRCDIR)/srclib/regex/regerror.$(OBJ) \
+	$(SRCDIR)/srclib/regex/regexec.$(OBJ) \
+	$(SRCDIR)/srclib/regex/regfree.$(OBJ)
+
 ZLIB_OBJS=\
 	$(SRCDIR)/srclib/zlib/adler32.$(OBJ) \
 	$(SRCDIR)/srclib/zlib/compress.$(OBJ) \
@@ -159,6 +165,9 @@
 {$(SRCDIR)\os\win32}.c{$(SRCDIR)\os\win32}.$(OBJ):
 	$(CC) $(CFLAGS) $(CPPFLAGS) -DACR_DECLARE_EXPORT $(INCLUDES) -c -Fo$@ -Fd$(LIBNAME)-src $<
 
+{$(SRCDIR)\srclib\regex}.c{$(SRCDIR)\srclib\regex}.$(OBJ):
+	$(CC) $(CFLAGS) $(CPPFLAGS) -DACR_DECLARE_EXPORT $(INCLUDES) -c -Fo$@ -Fd$(LIBNAME)-src $<
+
 {$(SRCDIR)\srclib\zlib}.c{$(SRCDIR)\srclib\zlib}.$(OBJ):
 	$(CC) $(CFLAGS) $(CPPFLAGS) -DACR_DECLARE_EXPORT $(INCLUDES) -c -Fo$@ -Fd$(LIBNAME)-src $<
 
@@ -171,10 +180,10 @@
 {$(SRCDIR)\modules\network\ssl}.c{$(SRCDIR)\modules\network\ssl}.$(OBJ):
 	$(CC) $(CFLAGS) $(CPPFLAGS) $(INCLUDES) -c -Fo$@ -Fd$(SSLNAME)-src $<
 
-$(SHAREDLIB): $(PPORT_OBJS) $(COMMON_OBJS) $(@platform@_OBJS) $(ZLIB_OBJS) @testobjs@
+$(SHAREDLIB): $(PPORT_OBJS) $(COMMON_OBJS) $(@platform@_OBJS) $(REGEX_OBJS) $(ZLIB_OBJS) @testobjs@
 	$(RC) /l 0x409 /d "NDEBUG" /i "$(SRCDIR)\include" /fo $@.res $(SRCDIR)/os/win32/main.rc
 	$(LINK) $(SHFLAGS) $(LDFLAGS) /DLL /SUBSYSTEM:WINDOWS /pdb:$(LIBNAME).pdb /out:$@ @<<
-	$(PPORT_OBJS) $(COMMON_OBJS) $(WINDOWS_OBJS) $(ZLIB_OBJS) @testobjs@ $@.res
+	$(PPORT_OBJS) $(COMMON_OBJS) $(WINDOWS_OBJS) $(REGEX_OBJS) $(ZLIB_OBJS) @testobjs@ $@.res
 <<
 	IF EXIST $@.manifest \
 		mt -nologo -manifest $@.manifest -outputresource:$@;2
@@ -207,6 +216,7 @@
 	-@del /Q $(SRCDIR)\os\win32\*.$(OBJ) 2>NUL
 	-@del /Q $(SRCDIR)\os\win32\*.res 2>NUL
 	-@del /Q $(SRCDIR)\modules\network\ssl\*.$(OBJ) 2>NUL
+	-@del /Q $(SRCDIR)\srclib\regex\*.$(OBJ) 2>NUL
 	-@del /Q $(SRCDIR)\srclib\zlib\*.$(OBJ) 2>NUL
 	-@del /Q *.dll  2>NUL
 	-@del /Q *.lib  2>NUL

Modified: commons/sandbox/runtime/trunk/src/main/native/include/acr_sbuf.h
URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/include/acr_sbuf.h?rev=807635&r1=807634&r2=807635&view=diff
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/include/acr_sbuf.h (original)
+++ commons/sandbox/runtime/trunk/src/main/native/include/acr_sbuf.h Tue Aug 25 13:57:05 2009
@@ -65,6 +65,13 @@
 
 typedef struct acr_sbuf_t acr_sbuf_t;
 
+struct acr_sbuf_t {
+    char            *s_buf;     /* storage buffer */
+    size_t           s_size;    /* size of storage buffer */
+    size_t           s_len;     /* current length of string */
+    int              s_flags;   /* flags */
+};
+
 /**
  * Sbuf API functions
  */

Modified: commons/sandbox/runtime/trunk/src/main/native/shared/ini.c
URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/shared/ini.c?rev=807635&r1=807634&r2=807635&view=diff
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/shared/ini.c (original)
+++ commons/sandbox/runtime/trunk/src/main/native/shared/ini.c Tue Aug 25 13:57:05 2009
@@ -25,6 +25,8 @@
 #include "acr_error.h"
 #include "acr_memory.h"
 #include "acr_string.h"
+#include "acr_env.h"
+#include "acr_sbuf.h"
 #include "acr_tables.h"
 #include "acr_ini.h"
 
@@ -280,7 +282,7 @@
     ACR_RING_ELEM_INIT(ini->cur, link);
     ACR_RING_INIT(&ini->cur->attr_ring, acr_ini_attr_t, link);
     ACR_RING_INIT(&ini->cur->node_ring, acr_ini_node_t, link);
-    
+
     return ini;
 }
 
@@ -342,7 +344,7 @@
 /**
  * Remove all leading and trailing space characters
  * from the string.
- * @return pointer to the first non space character. 
+ * @return pointer to the first non space character.
  * @note String is modified in place
  */
 static char *strtrim(char *s)
@@ -413,6 +415,34 @@
     return NULL;
 }
 
+/**
+ * Just like strpbrk but it doesn't break if the char
+ * is escaped by single quote
+ */
+static char *strpbrk_s(const char *s1, const char *s2)
+{
+    const char *scanp;
+    int c, sc, pc = *s1;
+
+    /* Some early sanity check */
+    if (!s1 || !*s1)
+        return NULL;
+    while ((c = *s1++) != 0) {
+        for (scanp = s2; (sc = *scanp++) != 0;) {
+            if (sc == c && pc != '\'')
+                return (char *)(s1 - 1);
+        }
+        /* Don't update the previous marker if it was \' already
+         * In that case we have escaped single quote.
+         */
+        if (pc == '\\' && c == '\'')
+            pc = 0;
+        else
+            pc = c;
+    }
+    return NULL;
+}
+
 static char *strunesc(char *s)
 {
     char *saved = s;
@@ -433,12 +463,102 @@
     return saved;
 }
 
-static char *expand_envars(char *s)
+static char *ini_expand(char *str, acr_ini_t *ini)
 {
-    /* TODO: Implement ${ENV} expansion.
-     * This should go in env.c
-     */
-    return s;
+    char        *var_pos;
+    char        *var_end;
+    char        *var_ptr;
+    char        *var_rep;
+    acr_sbuf_t   sbuf;
+
+    var_ptr = str;
+
+    if (!var_ptr || !*var_ptr)
+        return str; /* Guard against zero input */
+    var_pos = strpbrk_s(var_ptr, "$");
+    if (!var_pos) {
+        /* Nothing to replace.
+         */
+        if ((var_pos = strstr(str, "\\'$"))) {
+            /* Check if we have \'${FOO}' in which case
+             * unescape the string.
+             */
+            if (*(var_pos + 3) == '{' || *(var_pos + 3) == '(')
+                return strunesc(str);
+            else
+                return str;
+        }
+        else
+            return str;
+    }
+    acr_sbuf_new(&sbuf, NULL, strlen(str), ACR_SBUF_AUTOEXTEND);
+    /* Loop for each unescaped $ */
+    while (var_pos) {
+        int wch = 0;
+        var_end = NULL;
+        var_rep = NULL;
+        if (*(var_pos + 1) == '(') {
+            var_end = strpbrk(var_pos + 1, " })");
+            wch = ')';
+        }
+        else if (*(var_pos + 1) == '{') {
+            var_end = strpbrk(var_pos + 1, " })");
+            wch = '}';
+        }
+        if (var_end && *var_end == wch) {
+            *var_pos++ = '\0';
+            *var_pos++ = '\0';
+            *var_end++ = '\0';
+            /* Add the string before $ */
+            acr_sbuf_cat(&sbuf, var_ptr);
+            /* var_pos holds the variable name */
+            if (ini) {
+                acr_ini_attr_t *ap;
+                acr_ini_node_t *np = &ini->root;
+                ACR_RING_FOREACH(ap, &np->attr_ring, acr_ini_attr_t, link) {
+                    /* Match the Variable with Attribute key.
+                     */
+                    if (*ap->key && strcmp(ap->key, var_pos) == 0) {
+                        if (!ap->val)
+                            var_rep = "";
+                        else
+                            var_rep = ap->val;
+                        break;
+                    }
+                }
+            }
+            if (var_rep == NULL)
+                var_rep = ACR_EnvGet(var_pos);
+            if (var_rep) {
+                acr_sbuf_cat(&sbuf, var_rep);
+            }
+            else {
+                acr_sbuf_putc(&sbuf, '$');
+                if (wch == '}')
+                    acr_sbuf_putc(&sbuf, '{');
+                else
+                    acr_sbuf_putc(&sbuf, '(');
+                acr_sbuf_cat(&sbuf, var_pos);
+                acr_sbuf_putc(&sbuf, wch);
+            }
+            var_ptr = var_end;
+            var_pos = strpbrk_s(var_ptr, "$");
+        }
+        else {
+            *var_pos++ = '\0';
+            acr_sbuf_cat(&sbuf, var_ptr);
+            acr_sbuf_putc(&sbuf, '$');
+            var_ptr = var_pos;
+            var_pos = strpbrk_s(var_ptr, "$");
+        }
+    }
+    /* Add what's left from the original string */
+    acr_sbuf_cat(&sbuf, var_ptr);
+    acr_sbuf_finish(&sbuf);
+    free(str);
+
+    str = acr_sbuf_data(&sbuf);
+    return strunesc(str);
 }
 
 /*
@@ -462,7 +582,7 @@
         ACR_THROW_IO_ERRNO();
         return NULL;
     }
-    ini = ACR_IniNew(_E);    
+    ini = ACR_IniNew(_E);
     cur = ACR_IniNodeRoot(ini);
     ACR_IniNodeSetAttr(_E, cur, fname);
     while (fgets(buffer, ACR_PBUFF_SIZ, fp)) {
@@ -490,7 +610,7 @@
             }
             ACR_IniAttrAddVal(_E, attr, line);
             if (!nextline) {
-                attr->val = expand_envars(attr->val);
+                attr->val = ini_expand(attr->val, NULL);
             }
             continue;
         }
@@ -539,7 +659,7 @@
             }
             attr = ACR_IniNodeAttrAdd(_E, cur, line, val);
             if (attr->val && !nextline) {
-                attr->val = expand_envars(attr->val);
+                attr->val = ini_expand(attr->val, NULL);
             }
         }
     }
@@ -594,7 +714,7 @@
             }
             ACR_IniAttrAddVal(_E, attr, line);
             if (!nextline) {
-                attr->val = expand_envars(attr->val);
+                attr->val = ini_expand(attr->val, NULL);
             }
             continue;
         }
@@ -619,7 +739,7 @@
             nextline = 0;
             continue;
         }
-        key = strunesc(line); 
+        key = strunesc(line);
         if (allowdups) {
             attr = ACR_IniNodeAttrAdd(_E, node, key, val);
         }
@@ -627,7 +747,7 @@
             attr = ACR_IniNodeAttrSet(_E, node, key, val);
         }
         if (attr->val && !nextline) {
-            attr->val = expand_envars(attr->val);
+            attr->val = ini_expand(attr->val, NULL);
         }
     }
     fclose(fp);
@@ -675,7 +795,7 @@
             }
             ACR_IniAttrAddVal(_E, attr, line);
             if (!nextline) {
-                attr->val = expand_envars(attr->val);
+                attr->val = ini_expand(attr->val, ini);
             }
             continue;
         }
@@ -745,7 +865,7 @@
             }
             attr = ACR_IniNodeAttrAdd(_E, cur, key, val);
             if (attr->val && !nextline) {
-                attr->val = expand_envars(attr->val);
+                attr->val = ini_expand(attr->val, ini);
             }
         }
     }

Modified: commons/sandbox/runtime/trunk/src/main/native/shared/sbuf.c
URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/shared/sbuf.c?rev=807635&r1=807634&r2=807635&view=diff
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/shared/sbuf.c (original)
+++ commons/sandbox/runtime/trunk/src/main/native/shared/sbuf.c Tue Aug 25 13:57:05 2009
@@ -50,13 +50,6 @@
 /**
  * Structure definitions
  */
-struct acr_sbuf_t {
-    char            *s_buf;     /* storage buffer */
-    size_t           s_size;    /* size of storage buffer */
-    size_t           s_len;     /* current length of string */
-    int              s_flags;   /* flags */
-};
-
 #define KASSERT(e, m)
 
 /*

Added: commons/sandbox/runtime/trunk/src/main/native/srclib/regex/cclass.h
URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/srclib/regex/cclass.h?rev=807635&view=auto
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/srclib/regex/cclass.h (added)
+++ commons/sandbox/runtime/trunk/src/main/native/srclib/regex/cclass.h Tue Aug 25 13:57:05 2009
@@ -0,0 +1,56 @@
+/*-
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cclass.h	8.3 (Berkeley) 3/20/94
+ */
+
+/* character-class table */
+static struct cclass {
+	char *name;
+	char *chars;
+	char *multis;
+} cclasses[] = {
+    { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", ""} ,
+    { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", ""} ,
+    { "blank", " \t", ""} ,
+    { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37\177", ""} ,
+    { "digit", "0123456789", ""} ,
+    { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", ""} ,
+    { "lower", "abcdefghijklmnopqrstuvwxyz", ""} ,
+    { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", ""} ,
+    { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", ""} ,
+    { "space", "\t\n\v\f\r ", ""} ,
+    { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", ""} ,
+    { "xdigit", "0123456789ABCDEFabcdef", ""} ,
+    { NULL, 0, "" }
+};
+

Propchange: commons/sandbox/runtime/trunk/src/main/native/srclib/regex/cclass.h
------------------------------------------------------------------------------
    svn:eol-style = native

Added: commons/sandbox/runtime/trunk/src/main/native/srclib/regex/cname.h
URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/srclib/regex/cname.h?rev=807635&view=auto
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/srclib/regex/cname.h (added)
+++ commons/sandbox/runtime/trunk/src/main/native/srclib/regex/cname.h Tue Aug 25 13:57:05 2009
@@ -0,0 +1,139 @@
+/*	$OpenBSD: cname.h,v 1.5 2003/06/02 20:18:36 millert Exp $	*/
+
+/*-
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cname.h	8.3 (Berkeley) 3/20/94
+ */
+
+/* character-name table */
+static struct cname {
+	char *name;
+	char code;
+} cnames[] = {
+	{ "NUL",			'\0' },
+	{ "SOH",			'\001' },
+	{ "STX",			'\002' },
+	{ "ETX",			'\003' },
+	{ "EOT",			'\004' },
+	{ "ENQ",			'\005' },
+	{ "ACK",			'\006' },
+	{ "BEL",			'\007' },
+	{ "alert",			'\007' },
+	{ "BS",				'\010' },
+	{ "backspace",			'\b' },
+	{ "HT",				'\011' },
+	{ "tab",			'\t' },
+	{ "LF",				'\012' },
+	{ "newline",			'\n' },
+	{ "VT",				'\013' },
+	{ "vertical-tab",		'\v' },
+	{ "FF",				'\014' },
+	{ "form-feed",			'\f' },
+	{ "CR",				'\015' },
+	{ "carriage-return",		'\r' },
+	{ "SO",				'\016' },
+	{ "SI",				'\017' },
+	{ "DLE",			'\020' },
+	{ "DC1",			'\021' },
+	{ "DC2",			'\022' },
+	{ "DC3",			'\023' },
+	{ "DC4",			'\024' },
+	{ "NAK",			'\025' },
+	{ "SYN",			'\026' },
+	{ "ETB",			'\027' },
+	{ "CAN",			'\030' },
+	{ "EM",				'\031' },
+	{ "SUB",			'\032' },
+	{ "ESC",			'\033' },
+	{ "IS4",			'\034' },
+	{ "FS",				'\034' },
+	{ "IS3",			'\035' },
+	{ "GS",				'\035' },
+	{ "IS2",			'\036' },
+	{ "RS",				'\036' },
+	{ "IS1",			'\037' },
+	{ "US",				'\037' },
+	{ "space",			' ' },
+	{ "exclamation-mark",		'!' },
+	{ "quotation-mark",		'"' },
+	{ "number-sign",		'#' },
+	{ "dollar-sign",		'$' },
+	{ "percent-sign",		'%' },
+	{ "ampersand",			'&' },
+	{ "apostrophe",			'\'' },
+	{ "left-parenthesis",		'(' },
+	{ "right-parenthesis",		')' },
+	{ "asterisk",			'*' },
+	{ "plus-sign",			'+' },
+	{ "comma",			',' },
+	{ "hyphen",			'-' },
+	{ "hyphen-minus",		'-' },
+	{ "period",			'.' },
+	{ "full-stop",			'.' },
+	{ "slash",			'/' },
+	{ "solidus",			'/' },
+	{ "zero",			'0' },
+	{ "one",			'1' },
+	{ "two",			'2' },
+	{ "three",			'3' },
+	{ "four",			'4' },
+	{ "five",			'5' },
+	{ "six",			'6' },
+	{ "seven",			'7' },
+	{ "eight",			'8' },
+	{ "nine",			'9' },
+	{ "colon",			':' },
+	{ "semicolon",			';' },
+	{ "less-than-sign",		'<' },
+	{ "equals-sign",		'=' },
+	{ "greater-than-sign",		'>' },
+	{ "question-mark",		'?' },
+	{ "commercial-at",		'@' },
+	{ "left-square-bracket",	'[' },
+	{ "backslash",			'\\' },
+	{ "reverse-solidus",		'\\' },
+	{ "right-square-bracket",	']' },
+	{ "circumflex",			'^' },
+	{ "circumflex-accent",		'^' },
+	{ "underscore",			'_' },
+	{ "low-line",			'_' },
+	{ "grave-accent",		'`' },
+	{ "left-brace",			'{' },
+	{ "left-curly-bracket",		'{' },
+	{ "vertical-line",		'|' },
+	{ "right-brace",		'}' },
+	{ "right-curly-bracket",	'}' },
+	{ "tilde",			'~' },
+	{ "DEL",			'\177' },
+	{ NULL,				0 }
+};

Propchange: commons/sandbox/runtime/trunk/src/main/native/srclib/regex/cname.h
------------------------------------------------------------------------------
    svn:eol-style = native

Added: commons/sandbox/runtime/trunk/src/main/native/srclib/regex/engine.c
URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/srclib/regex/engine.c?rev=807635&view=auto
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/srclib/regex/engine.c (added)
+++ commons/sandbox/runtime/trunk/src/main/native/srclib/regex/engine.c Tue Aug 25 13:57:05 2009
@@ -0,0 +1,1021 @@
+/*	$OpenBSD: engine.c,v 1.15 2005/08/05 13:03:00 espie Exp $	*/
+
+/*-
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)engine.c	8.5 (Berkeley) 3/20/94
+ */
+
+/*
+ * The matching engine and friends.  This file is #included by regexec.c
+ * after suitable #defines of a variety of macros used herein, so that
+ * different state representations can be used without duplicating masses
+ * of code.
+ */
+
+#ifdef SNAMES
+#define	matcher	smatcher
+#define	fast	sfast
+#define	slow	sslow
+#define	dissect	sdissect
+#define	backref	sbackref
+#define	step	sstep
+#define	print	sprint
+#define	at	sat
+#define	match	smat
+#define	nope	snope
+#endif
+#ifdef LNAMES
+#define	matcher	lmatcher
+#define	fast	lfast
+#define	slow	lslow
+#define	dissect	ldissect
+#define	backref	lbackref
+#define	step	lstep
+#define	print	lprint
+#define	at	lat
+#define	match	lmat
+#define	nope	lnope
+#endif
+
+/* another structure passed up and down to avoid zillions of parameters */
+struct match {
+	struct re_guts *g;
+	int eflags;
+	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
+	char *offp;		/* offsets work from here */
+	char *beginp;		/* start of string -- virtual NUL precedes */
+	char *endp;		/* end of string -- virtual NUL here */
+	char *coldp;		/* can be no match starting before here */
+	char **lastpos;		/* [nplus+1] */
+	STATEVARS;
+	states st;		/* current states */
+	states fresh;		/* states for a fresh start */
+	states tmp;		/* temporary */
+	states empty;		/* empty set of states */
+};
+
+static int matcher(struct re_guts *, char *, size_t, regmatch_t[], int);
+static char *dissect(struct match *, char *, char *, sopno, sopno);
+static char *backref(struct match *, char *, char *, sopno, sopno, sopno, int);
+static char *fast(struct match *, char *, char *, sopno, sopno);
+static char *slow(struct match *, char *, char *, sopno, sopno);
+static states step(struct re_guts *, sopno, sopno, states, int, states);
+#define MAX_RECURSION	100
+#define	BOL	(OUT+1)
+#define	EOL	(BOL+1)
+#define	BOLEOL	(BOL+2)
+#define	NOTHING	(BOL+3)
+#define	BOW	(BOL+4)
+#define	EOW	(BOL+5)
+#define	CODEMAX	(BOL+5)		/* highest code used */
+#define	NONCHAR(c)	((c) > CHAR_MAX)
+#define	NNONCHAR	(CODEMAX-CHAR_MAX)
+#ifdef REDEBUG
+static void print(struct match *, char *, states, int, FILE *);
+#endif
+#ifdef REDEBUG
+static void at(struct match *, char *, char *, char *, sopno, sopno);
+#endif
+#ifdef REDEBUG
+static char *pchar(int);
+#endif
+
+#ifdef REDEBUG
+#define	SP(t, s, c)	print(m, t, s, c, stdout)
+#define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
+#define	NOTE(str)	{ if (m->eflags&REG_TRACE) (void)printf("=%s\n", (str)); }
+static int nope = 0;
+#else
+#define	SP(t, s, c)	/* nothing */
+#define	AT(t, p1, p2, s1, s2)	/* nothing */
+#define	NOTE(s)	/* nothing */
+#endif
+
+/*
+ - matcher - the actual matching engine
+ */
+static int			/* 0 success, REG_NOMATCH failure */
+matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
+    int eflags)
+{
+	char *endp;
+	int i;
+	struct match mv;
+	struct match *m = &mv;
+	char *dp;
+	const sopno gf = g->firststate+1;	/* +1 for OEND */
+	const sopno gl = g->laststate;
+	char *start;
+	char *stop;
+
+	/* simplify the situation where possible */
+	if (g->cflags&REG_NOSUB)
+		nmatch = 0;
+	if (eflags&REG_STARTEND) {
+		start = string + pmatch[0].rm_so;
+		stop = string + pmatch[0].rm_eo;
+	} else {
+		start = string;
+		stop = start + strlen(start);
+	}
+	if (stop < start)
+		return(REG_INVARG);
+
+	/* prescreening; this does wonders for this rather slow code */
+	if (g->must != NULL) {
+		for (dp = start; dp < stop; dp++)
+			if (*dp == g->must[0] && stop - dp >= g->mlen &&
+				memcmp(dp, g->must, (size_t)g->mlen) == 0)
+				break;
+		if (dp == stop)		/* we didn't find g->must */
+			return(REG_NOMATCH);
+	}
+
+	/* match struct setup */
+	m->g = g;
+	m->eflags = eflags;
+	m->pmatch = NULL;
+	m->lastpos = NULL;
+	m->offp = string;
+	m->beginp = start;
+	m->endp = stop;
+	STATESETUP(m, 4);
+	SETUP(m->st);
+	SETUP(m->fresh);
+	SETUP(m->tmp);
+	SETUP(m->empty);
+	CLEAR(m->empty);
+
+	/* this loop does only one repetition except for backrefs */
+	for (;;) {
+		endp = fast(m, start, stop, gf, gl);
+		if (endp == NULL) {		/* a miss */
+			free(m->pmatch);
+			free(m->lastpos);
+			STATETEARDOWN(m);
+			return(REG_NOMATCH);
+		}
+		if (nmatch == 0 && !g->backrefs)
+			break;		/* no further info needed */
+
+		/* where? */
+		assert(m->coldp != NULL);
+		for (;;) {
+			NOTE("finding start");
+			endp = slow(m, m->coldp, stop, gf, gl);
+			if (endp != NULL)
+				break;
+			assert(m->coldp < m->endp);
+			m->coldp++;
+		}
+		if (nmatch == 1 && !g->backrefs)
+			break;		/* no further info needed */
+
+		/* oh my, he wants the subexpressions... */
+		if (m->pmatch == NULL)
+			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
+							sizeof(regmatch_t));
+		if (m->pmatch == NULL) {
+			STATETEARDOWN(m);
+			return(REG_ESPACE);
+		}
+		for (i = 1; i <= m->g->nsub; i++)
+			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
+		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
+			NOTE("dissecting");
+			dp = dissect(m, m->coldp, endp, gf, gl);
+		} else {
+			if (g->nplus > 0 && m->lastpos == NULL)
+				m->lastpos = (char **)malloc((g->nplus+1) *
+							sizeof(char *));
+			if (g->nplus > 0 && m->lastpos == NULL) {
+				free(m->pmatch);
+				STATETEARDOWN(m);
+				return(REG_ESPACE);
+			}
+			NOTE("backref dissect");
+			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
+		}
+		if (dp != NULL)
+			break;
+
+		/* uh-oh... we couldn't find a subexpression-level match */
+		assert(g->backrefs);	/* must be back references doing it */
+		assert(g->nplus == 0 || m->lastpos != NULL);
+		for (;;) {
+			if (dp != NULL || endp <= m->coldp)
+				break;		/* defeat */
+			NOTE("backoff");
+			endp = slow(m, m->coldp, endp-1, gf, gl);
+			if (endp == NULL)
+				break;		/* defeat */
+			/* try it on a shorter possibility */
+#ifndef NDEBUG
+			for (i = 1; i <= m->g->nsub; i++) {
+				assert(m->pmatch[i].rm_so == -1);
+				assert(m->pmatch[i].rm_eo == -1);
+			}
+#endif
+			NOTE("backoff dissect");
+			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
+		}
+		assert(dp == NULL || dp == endp);
+		if (dp != NULL)		/* found a shorter one */
+			break;
+
+		/* despite initial appearances, there is no match here */
+		NOTE("false alarm");
+		if (m->coldp == stop)
+			break;
+		start = m->coldp + 1;	/* recycle starting later */
+	}
+
+	/* fill in the details if requested */
+	if (nmatch > 0) {
+		pmatch[0].rm_so = m->coldp - m->offp;
+		pmatch[0].rm_eo = endp - m->offp;
+	}
+	if (nmatch > 1) {
+		assert(m->pmatch != NULL);
+		for (i = 1; i < nmatch; i++)
+			if (i <= m->g->nsub)
+				pmatch[i] = m->pmatch[i];
+			else {
+				pmatch[i].rm_so = -1;
+				pmatch[i].rm_eo = -1;
+			}
+	}
+
+	if (m->pmatch != NULL)
+		free((char *)m->pmatch);
+	if (m->lastpos != NULL)
+		free((char *)m->lastpos);
+	STATETEARDOWN(m);
+	return(0);
+}
+
+/*
+ - dissect - figure out what matched what, no back references
+ */
+static char *			/* == stop (success) always */
+dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
+{
+	int i;
+	sopno ss;	/* start sop of current subRE */
+	sopno es;	/* end sop of current subRE */
+	char *sp;	/* start of string matched by it */
+	char *stp;	/* string matched by it cannot pass here */
+	char *rest;	/* start of rest of string */
+	char *tail;	/* string unmatched by rest of RE */
+	sopno ssub;	/* start sop of subsubRE */
+	sopno esub;	/* end sop of subsubRE */
+	char *ssp;	/* start of string matched by subsubRE */
+	char *sep;	/* end of string matched by subsubRE */
+	char *oldssp;	/* previous ssp */
+	char *dp;
+
+	AT("diss", start, stop, startst, stopst);
+	sp = start;
+	for (ss = startst; ss < stopst; ss = es) {
+		/* identify end of subRE */
+		es = ss;
+		switch (OP(m->g->strip[es])) {
+		case OPLUS_:
+		case OQUEST_:
+			es += OPND(m->g->strip[es]);
+			break;
+		case OCH_:
+			while (OP(m->g->strip[es]) != O_CH)
+				es += OPND(m->g->strip[es]);
+			break;
+		}
+		es++;
+
+		/* figure out what it matched */
+		switch (OP(m->g->strip[ss])) {
+		case OEND:
+			assert(nope);
+			break;
+		case OCHAR:
+			sp++;
+			break;
+		case OBOL:
+		case OEOL:
+		case OBOW:
+		case OEOW:
+			break;
+		case OANY:
+		case OANYOF:
+			sp++;
+			break;
+		case OBACK_:
+		case O_BACK:
+			assert(nope);
+			break;
+		/* cases where length of match is hard to find */
+		case OQUEST_:
+			stp = stop;
+			for (;;) {
+				/* how long could this one be? */
+				rest = slow(m, sp, stp, ss, es);
+				assert(rest != NULL);	/* it did match */
+				/* could the rest match the rest? */
+				tail = slow(m, rest, stop, es, stopst);
+				if (tail == stop)
+					break;		/* yes! */
+				/* no -- try a shorter match for this one */
+				stp = rest - 1;
+				assert(stp >= sp);	/* it did work */
+			}
+			ssub = ss + 1;
+			esub = es - 1;
+			/* did innards match? */
+			if (slow(m, sp, rest, ssub, esub) != NULL) {
+				dp = dissect(m, sp, rest, ssub, esub);
+				assert(dp == rest);
+			} else		/* no */
+				assert(sp == rest);
+			sp = rest;
+			break;
+		case OPLUS_:
+			stp = stop;
+			for (;;) {
+				/* how long could this one be? */
+				rest = slow(m, sp, stp, ss, es);
+				assert(rest != NULL);	/* it did match */
+				/* could the rest match the rest? */
+				tail = slow(m, rest, stop, es, stopst);
+				if (tail == stop)
+					break;		/* yes! */
+				/* no -- try a shorter match for this one */
+				stp = rest - 1;
+				assert(stp >= sp);	/* it did work */
+			}
+			ssub = ss + 1;
+			esub = es - 1;
+			ssp = sp;
+			oldssp = ssp;
+			for (;;) {	/* find last match of innards */
+				sep = slow(m, ssp, rest, ssub, esub);
+				if (sep == NULL || sep == ssp)
+					break;	/* failed or matched null */
+				oldssp = ssp;	/* on to next try */
+				ssp = sep;
+			}
+			if (sep == NULL) {
+				/* last successful match */
+				sep = ssp;
+				ssp = oldssp;
+			}
+			assert(sep == rest);	/* must exhaust substring */
+			assert(slow(m, ssp, sep, ssub, esub) == rest);
+			dp = dissect(m, ssp, sep, ssub, esub);
+			assert(dp == sep);
+			sp = rest;
+			break;
+		case OCH_:
+			stp = stop;
+			for (;;) {
+				/* how long could this one be? */
+				rest = slow(m, sp, stp, ss, es);
+				assert(rest != NULL);	/* it did match */
+				/* could the rest match the rest? */
+				tail = slow(m, rest, stop, es, stopst);
+				if (tail == stop)
+					break;		/* yes! */
+				/* no -- try a shorter match for this one */
+				stp = rest - 1;
+				assert(stp >= sp);	/* it did work */
+			}
+			ssub = ss + 1;
+			esub = ss + OPND(m->g->strip[ss]) - 1;
+			assert(OP(m->g->strip[esub]) == OOR1);
+			for (;;) {	/* find first matching branch */
+				if (slow(m, sp, rest, ssub, esub) == rest)
+					break;	/* it matched all of it */
+				/* that one missed, try next one */
+				assert(OP(m->g->strip[esub]) == OOR1);
+				esub++;
+				assert(OP(m->g->strip[esub]) == OOR2);
+				ssub = esub + 1;
+				esub += OPND(m->g->strip[esub]);
+				if (OP(m->g->strip[esub]) == OOR2)
+					esub--;
+				else
+					assert(OP(m->g->strip[esub]) == O_CH);
+			}
+			dp = dissect(m, sp, rest, ssub, esub);
+			assert(dp == rest);
+			sp = rest;
+			break;
+		case O_PLUS:
+		case O_QUEST:
+		case OOR1:
+		case OOR2:
+		case O_CH:
+			assert(nope);
+			break;
+		case OLPAREN:
+			i = OPND(m->g->strip[ss]);
+			assert(0 < i && i <= m->g->nsub);
+			m->pmatch[i].rm_so = sp - m->offp;
+			break;
+		case ORPAREN:
+			i = OPND(m->g->strip[ss]);
+			assert(0 < i && i <= m->g->nsub);
+			m->pmatch[i].rm_eo = sp - m->offp;
+			break;
+		default:		/* uh oh */
+			assert(nope);
+			break;
+		}
+	}
+
+	assert(sp == stop);
+	return(sp);
+}
+
+/*
+ - backref - figure out what matched what, figuring in back references
+ */
+static char *			/* == stop (success) or NULL (failure) */
+backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
+    sopno lev, int rec)			/* PLUS nesting level */
+{
+	int i;
+	sopno ss;	/* start sop of current subRE */
+	char *sp;	/* start of string matched by it */
+	sopno ssub;	/* start sop of subsubRE */
+	sopno esub;	/* end sop of subsubRE */
+	char *ssp;	/* start of string matched by subsubRE */
+	char *dp;
+	size_t len;
+	int hard;
+	sop s;
+	regoff_t offsave;
+	cset *cs;
+
+	AT("back", start, stop, startst, stopst);
+	sp = start;
+
+	/* get as far as we can with easy stuff */
+	hard = 0;
+	for (ss = startst; !hard && ss < stopst; ss++)
+		switch (OP(s = m->g->strip[ss])) {
+		case OCHAR:
+			if (sp == stop || *sp++ != (char)OPND(s))
+				return(NULL);
+			break;
+		case OANY:
+			if (sp == stop)
+				return(NULL);
+			sp++;
+			break;
+		case OANYOF:
+			cs = &m->g->sets[OPND(s)];
+			if (sp == stop || !CHIN(cs, *sp++))
+				return(NULL);
+			break;
+		case OBOL:
+			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
+					(sp < m->endp && *(sp-1) == '\n' &&
+						(m->g->cflags&REG_NEWLINE)) )
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case OEOL:
+			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
+					(sp < m->endp && *sp == '\n' &&
+						(m->g->cflags&REG_NEWLINE)) )
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case OBOW:
+			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
+					(sp < m->endp && *(sp-1) == '\n' &&
+						(m->g->cflags&REG_NEWLINE)) ||
+					(sp > m->beginp &&
+							!ISWORD(*(sp-1))) ) &&
+					(sp < m->endp && ISWORD(*sp)) )
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case OEOW:
+			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
+					(sp < m->endp && *sp == '\n' &&
+						(m->g->cflags&REG_NEWLINE)) ||
+					(sp < m->endp && !ISWORD(*sp)) ) &&
+					(sp > m->beginp && ISWORD(*(sp-1))) )
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case O_QUEST:
+			break;
+		case OOR1:	/* matches null but needs to skip */
+			ss++;
+			s = m->g->strip[ss];
+			do {
+				assert(OP(s) == OOR2);
+				ss += OPND(s);
+			} while (OP(s = m->g->strip[ss]) != O_CH);
+			/* note that the ss++ gets us past the O_CH */
+			break;
+		default:	/* have to make a choice */
+			hard = 1;
+			break;
+		}
+	if (!hard) {		/* that was it! */
+		if (sp != stop)
+			return(NULL);
+		return(sp);
+	}
+	ss--;			/* adjust for the for's final increment */
+
+	/* the hard stuff */
+	AT("hard", sp, stop, ss, stopst);
+	s = m->g->strip[ss];
+	switch (OP(s)) {
+	case OBACK_:		/* the vilest depths */
+		i = OPND(s);
+		assert(0 < i && i <= m->g->nsub);
+		if (m->pmatch[i].rm_eo == -1)
+			return(NULL);
+		assert(m->pmatch[i].rm_so != -1);
+		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
+		if (len == 0 && rec++ > MAX_RECURSION)
+			return(NULL);
+		assert(stop - m->beginp >= len);
+		if (sp > stop - len)
+			return(NULL);	/* not enough left to match */
+		ssp = m->offp + m->pmatch[i].rm_so;
+		if (memcmp(sp, ssp, len) != 0)
+			return(NULL);
+		while (m->g->strip[ss] != SOP(O_BACK, i))
+			ss++;
+		return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
+		break;
+	case OQUEST_:		/* to null or not */
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+		if (dp != NULL)
+			return(dp);	/* not */
+		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
+		break;
+	case OPLUS_:
+		assert(m->lastpos != NULL);
+		assert(lev+1 <= m->g->nplus);
+		m->lastpos[lev+1] = sp;
+		return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
+		break;
+	case O_PLUS:
+		if (sp == m->lastpos[lev])	/* last pass matched null */
+			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
+		/* try another pass */
+		m->lastpos[lev] = sp;
+		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
+		if (dp == NULL)
+			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
+		else
+			return(dp);
+		break;
+	case OCH_:		/* find the right one, if any */
+		ssub = ss + 1;
+		esub = ss + OPND(s) - 1;
+		assert(OP(m->g->strip[esub]) == OOR1);
+		for (;;) {	/* find first matching branch */
+			dp = backref(m, sp, stop, ssub, esub, lev, rec);
+			if (dp != NULL)
+				return(dp);
+			/* that one missed, try next one */
+			if (OP(m->g->strip[esub]) == O_CH)
+				return(NULL);	/* there is none */
+			esub++;
+			assert(OP(m->g->strip[esub]) == OOR2);
+			ssub = esub + 1;
+			esub += OPND(m->g->strip[esub]);
+			if (OP(m->g->strip[esub]) == OOR2)
+				esub--;
+			else
+				assert(OP(m->g->strip[esub]) == O_CH);
+		}
+		break;
+	case OLPAREN:		/* must undo assignment if rest fails */
+		i = OPND(s);
+		assert(0 < i && i <= m->g->nsub);
+		offsave = m->pmatch[i].rm_so;
+		m->pmatch[i].rm_so = sp - m->offp;
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+		if (dp != NULL)
+			return(dp);
+		m->pmatch[i].rm_so = offsave;
+		return(NULL);
+		break;
+	case ORPAREN:		/* must undo assignment if rest fails */
+		i = OPND(s);
+		assert(0 < i && i <= m->g->nsub);
+		offsave = m->pmatch[i].rm_eo;
+		m->pmatch[i].rm_eo = sp - m->offp;
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+		if (dp != NULL)
+			return(dp);
+		m->pmatch[i].rm_eo = offsave;
+		return(NULL);
+		break;
+	default:		/* uh oh */
+		assert(nope);
+		break;
+	}
+
+	/* "can't happen" */
+	assert(nope);
+	/* NOTREACHED */
+    return NULL;
+}
+
+/*
+ - fast - step through the string at top speed
+ */
+static char *			/* where tentative match ended, or NULL */
+fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
+{
+	states st = m->st;
+	states fresh = m->fresh;
+	states tmp = m->tmp;
+	char *p = start;
+	int c = (start == m->beginp) ? OUT : *(start-1);
+	int lastc;	/* previous c */
+	int flagch;
+	int i;
+	char *coldp;	/* last p after which no match was underway */
+
+	CLEAR(st);
+	SET1(st, startst);
+	st = step(m->g, startst, stopst, st, NOTHING, st);
+	ASSIGN(fresh, st);
+	SP("start", st, *p);
+	coldp = NULL;
+	for (;;) {
+		/* next character */
+		lastc = c;
+		c = (p == m->endp) ? OUT : *p;
+		if (EQ(st, fresh))
+			coldp = p;
+
+		/* is there an EOL and/or BOL between lastc and c? */
+		flagch = '\0';
+		i = 0;
+		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
+				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
+			flagch = BOL;
+			i = m->g->nbol;
+		}
+		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
+				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
+			flagch = (flagch == BOL) ? BOLEOL : EOL;
+			i += m->g->neol;
+		}
+		if (i != 0) {
+			for (; i > 0; i--)
+				st = step(m->g, startst, stopst, st, flagch, st);
+			SP("boleol", st, c);
+		}
+
+		/* how about a word boundary? */
+		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
+					(c != OUT && ISWORD(c)) ) {
+			flagch = BOW;
+		}
+		if ( (lastc != OUT && ISWORD(lastc)) &&
+				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+			flagch = EOW;
+		}
+		if (flagch == BOW || flagch == EOW) {
+			st = step(m->g, startst, stopst, st, flagch, st);
+			SP("boweow", st, c);
+		}
+
+		/* are we done? */
+		if (ISSET(st, stopst) || p == stop)
+			break;		/* NOTE BREAK OUT */
+
+		/* no, we must deal with this character */
+		ASSIGN(tmp, st);
+		ASSIGN(st, fresh);
+		assert(c != OUT);
+		st = step(m->g, startst, stopst, tmp, c, st);
+		SP("aft", st, c);
+		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+		p++;
+	}
+
+	assert(coldp != NULL);
+	m->coldp = coldp;
+	if (ISSET(st, stopst))
+		return(p+1);
+	else
+		return(NULL);
+}
+
+/*
+ - slow - step through the string more deliberately
+ */
+static char *			/* where it ended */
+slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
+{
+	states st = m->st;
+	states empty = m->empty;
+	states tmp = m->tmp;
+	char *p = start;
+	int c = (start == m->beginp) ? OUT : *(start-1);
+	int lastc;	/* previous c */
+	int flagch;
+	int i;
+	char *matchp;	/* last p at which a match ended */
+
+	AT("slow", start, stop, startst, stopst);
+	CLEAR(st);
+	SET1(st, startst);
+	SP("sstart", st, *p);
+	st = step(m->g, startst, stopst, st, NOTHING, st);
+	matchp = NULL;
+	for (;;) {
+		/* next character */
+		lastc = c;
+		c = (p == m->endp) ? OUT : *p;
+
+		/* is there an EOL and/or BOL between lastc and c? */
+		flagch = '\0';
+		i = 0;
+		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
+				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
+			flagch = BOL;
+			i = m->g->nbol;
+		}
+		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
+				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
+			flagch = (flagch == BOL) ? BOLEOL : EOL;
+			i += m->g->neol;
+		}
+		if (i != 0) {
+			for (; i > 0; i--)
+				st = step(m->g, startst, stopst, st, flagch, st);
+			SP("sboleol", st, c);
+		}
+
+		/* how about a word boundary? */
+		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
+					(c != OUT && ISWORD(c)) ) {
+			flagch = BOW;
+		}
+		if ( (lastc != OUT && ISWORD(lastc)) &&
+				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+			flagch = EOW;
+		}
+		if (flagch == BOW || flagch == EOW) {
+			st = step(m->g, startst, stopst, st, flagch, st);
+			SP("sboweow", st, c);
+		}
+
+		/* are we done? */
+		if (ISSET(st, stopst))
+			matchp = p;
+		if (EQ(st, empty) || p == stop)
+			break;		/* NOTE BREAK OUT */
+
+		/* no, we must deal with this character */
+		ASSIGN(tmp, st);
+		ASSIGN(st, empty);
+		assert(c != OUT);
+		st = step(m->g, startst, stopst, tmp, c, st);
+		SP("saft", st, c);
+		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+		p++;
+	}
+
+	return(matchp);
+}
+
+
+/*
+ - step - map set of states reachable before char to set reachable after
+ */
+static states
+step(struct re_guts *g,
+    sopno start,		/* start state within strip */
+    sopno stop,			/* state after stop state within strip */
+    states bef,			/* states reachable before */
+    int ch,			/* character or NONCHAR code */
+    states aft)			/* states already known reachable after */
+{
+	cset *cs;
+	sop s;
+	sopno pc;
+	onestate here;		/* note, macros know this name */
+	sopno look;
+	int i;
+
+	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
+		s = g->strip[pc];
+		switch (OP(s)) {
+		case OEND:
+			assert(pc == stop-1);
+			break;
+		case OCHAR:
+			/* only characters can match */
+			assert(!NONCHAR(ch) || ch != (char)OPND(s));
+			if (ch == (char)OPND(s))
+				FWD(aft, bef, 1);
+			break;
+		case OBOL:
+			if (ch == BOL || ch == BOLEOL)
+				FWD(aft, bef, 1);
+			break;
+		case OEOL:
+			if (ch == EOL || ch == BOLEOL)
+				FWD(aft, bef, 1);
+			break;
+		case OBOW:
+			if (ch == BOW)
+				FWD(aft, bef, 1);
+			break;
+		case OEOW:
+			if (ch == EOW)
+				FWD(aft, bef, 1);
+			break;
+		case OANY:
+			if (!NONCHAR(ch))
+				FWD(aft, bef, 1);
+			break;
+		case OANYOF:
+			cs = &g->sets[OPND(s)];
+			if (!NONCHAR(ch) && CHIN(cs, ch))
+				FWD(aft, bef, 1);
+			break;
+		case OBACK_:		/* ignored here */
+		case O_BACK:
+			FWD(aft, aft, 1);
+			break;
+		case OPLUS_:		/* forward, this is just an empty */
+			FWD(aft, aft, 1);
+			break;
+		case O_PLUS:		/* both forward and back */
+			FWD(aft, aft, 1);
+			i = ISSETBACK(aft, OPND(s));
+			BACK(aft, aft, OPND(s));
+			if (!i && ISSETBACK(aft, OPND(s))) {
+				/* oho, must reconsider loop body */
+				pc -= OPND(s) + 1;
+				INIT(here, pc);
+			}
+			break;
+		case OQUEST_:		/* two branches, both forward */
+			FWD(aft, aft, 1);
+			FWD(aft, aft, OPND(s));
+			break;
+		case O_QUEST:		/* just an empty */
+			FWD(aft, aft, 1);
+			break;
+		case OLPAREN:		/* not significant here */
+		case ORPAREN:
+			FWD(aft, aft, 1);
+			break;
+		case OCH_:		/* mark the first two branches */
+			FWD(aft, aft, 1);
+			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+			FWD(aft, aft, OPND(s));
+			break;
+		case OOR1:		/* done a branch, find the O_CH */
+			if (ISSTATEIN(aft, here)) {
+				for (look = 1;
+						OP(s = g->strip[pc+look]) != O_CH;
+						look += OPND(s))
+					assert(OP(s) == OOR2);
+				FWD(aft, aft, look);
+			}
+			break;
+		case OOR2:		/* propagate OCH_'s marking */
+			FWD(aft, aft, 1);
+			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
+				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+				FWD(aft, aft, OPND(s));
+			}
+			break;
+		case O_CH:		/* just empty */
+			FWD(aft, aft, 1);
+			break;
+		default:		/* ooooops... */
+			assert(nope);
+			break;
+		}
+	}
+
+	return(aft);
+}
+
+#ifdef REDEBUG
+/*
+ - print - print a set of states
+ */
+static void
+print(struct match *m, char *caption, states st, int ch, FILE *d)
+{
+	struct re_guts *g = m->g;
+	int i;
+	int first = 1;
+
+	if (!(m->eflags&REG_TRACE))
+		return;
+
+	(void)fprintf(d, "%s", caption);
+	if (ch != '\0')
+		(void)fprintf(d, " %s", pchar(ch));
+	for (i = 0; i < g->nstates; i++)
+		if (ISSET(st, i)) {
+			(void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
+			first = 0;
+		}
+	(void)fprintf(d, "\n");
+}
+
+/* 
+ - at - print current situation
+ */
+static void
+at(struct match *m, char *title, char *start, char *stop, sopno startst,
+    sopno stopst)
+{
+	if (!(m->eflags&REG_TRACE))
+		return;
+
+	(void)printf("%s %s-", title, pchar(*start));
+	(void)printf("%s ", pchar(*stop));
+	(void)printf("%ld-%ld\n", (long)startst, (long)stopst);
+}
+
+#ifndef PCHARDONE
+#define	PCHARDONE	/* never again */
+/*
+ - pchar - make a character printable
+ *
+ * Is this identical to regchar() over in debug.c?  Well, yes.  But a
+ * duplicate here avoids having a debugging-capable regexec.o tied to
+ * a matching debug.o, and this is convenient.  It all disappears in
+ * the non-debug compilation anyway, so it doesn't matter much.
+ */
+static char *			/* -> representation */
+pchar(int ch)
+{
+	static char pbuf[10];
+
+	if (isprint(ch) || ch == ' ')
+		(void)snprintf(pbuf, sizeof pbuf, "%c", ch);
+	else
+		(void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
+	return(pbuf);
+}
+#endif
+#endif
+
+#undef	matcher
+#undef	fast
+#undef	slow
+#undef	dissect
+#undef	backref
+#undef	step
+#undef	print
+#undef	at
+#undef	match
+#undef	nope

Propchange: commons/sandbox/runtime/trunk/src/main/native/srclib/regex/engine.c
------------------------------------------------------------------------------
    svn:eol-style = native

Added: commons/sandbox/runtime/trunk/src/main/native/srclib/regex/re_comp.h
URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/srclib/regex/re_comp.h?rev=807635&view=auto
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/srclib/regex/re_comp.h (added)
+++ commons/sandbox/runtime/trunk/src/main/native/srclib/regex/re_comp.h Tue Aug 25 13:57:05 2009
@@ -0,0 +1,43 @@
+/*-
+ * Copyright (c) 1996 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by J.T. Conklin.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RE_COMP_H_
+#define _RE_COMP_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+char   *re_comp(const char *);
+int	re_exec(const char *);
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RE_COMP_H_ */
+

Propchange: commons/sandbox/runtime/trunk/src/main/native/srclib/regex/re_comp.h
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message