apr-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From david reid <da...@jetnet.co.uk>
Subject regex
Date Sat, 12 Aug 2006 20:13:41 GMT
After a discussion on irc, I've started lookign at adding pcre support
to apr-util. The patch to start this off is below...

Not perfect and not quite complete, but I said I'd post early on this
and let others look.

david


Property changes on: regex
___________________________________________________________________
Name: svn:ignore
   + .libs



Index: regex/apr_regex.c
===================================================================
--- regex/apr_regex.c	(revision 0)
+++ regex/apr_regex.c	(revision 0)
@@ -0,0 +1,252 @@
+/**************************************************
+ *      Perl-Compatible Regular Expressions       *
+ **************************************************/
+
+/*
+This is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language. See
+the file Tech.Notes for some information on the internals.
+
+This module is a wrapper that provides a POSIX API to the underlying PCRE
+functions.
+
+Written by: Philip Hazel <ph10@cam.ac.uk>
+
+           Copyright (c) 1997-2004 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#include "apr_strings.h"
+#include "apr_regex.h"
+
+#include "apu_config.h"
+
+#if APU_HAVE_PCRE
+
+#include "pcre.h"
+
+#define APR_WANT_STRFUNC
+#include "apr_want.h"
+
+/* Table of error strings corresponding to POSIX error codes; must be
+ * kept in synch with apr_regex.h's APR_REG_E* definitions. */
+
+static const char *const pstring[] = {
+    "",                                /* Dummy for value 0 */
+    "internal error",                  /* APR_REG_ASSERT */
+    "failed to get memory",            /* APR_REG_ESPACE */
+    "bad argument",                    /* APR_REG_INVARG */
+    "match failed"                     /* APR_REG_NOMATCH */
+};
+
+APR_DECLARE(apr_size_t) apr_regerror(int errcode, const apr_regex_t *preg,
+                                    char *errbuf, apr_size_t errbuf_size)
+{
+    const char *message, *addmessage;
+    apr_size_t length, addlength;
+
+    message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
+               "unknown error code" : pstring[errcode];
+    length = strlen(message) + 1;
+
+    addmessage = " at offset ";
+    addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
+      strlen(addmessage) + 6 : 0;
+
+    if (errbuf_size > 0) {
+        if (addlength > 0 && errbuf_size >= length + addlength)
+            apr_snprintf(errbuf, sizeof errbuf,
+                         "%s%s%-6d", message, addmessage,
+                         (int)preg->re_erroffset);
+        else {
+            strncpy(errbuf, message, errbuf_size - 1);
+            errbuf[errbuf_size-1] = 0;
+        }
+    }
+
+    return length + addlength;
+}
+
+/*************************************************
+*           Free store held by a regex           *
+*************************************************/
+
+APR_DECLARE(void) apr_regfree(apr_regex_t *preg)
+{
+    (pcre_free)(preg->re_pcre);
+}
+
+/*************************************************
+*            Compile a regular expression        *
+*************************************************/
+/*
+Arguments:
+  preg        points to a structure for recording the compiled expression
+  pattern     the pattern to compile
+  cflags      compilation flags
+
+Returns:      0 on success
+              various non-zero codes on failure
+*/
+
+APR_DECLARE(apr_status_t) apr_regcomp(apr_regex_t **preg, const char
*pattern,
+                                      int cflags, apr_pool_t *p)
+{
+    const char *errorptr;
+    int erroffset;
+    int options = 0;
+    pcre *re = NULL;
+    apr_regex_t *are = NULL;
+
+    if (!p)
+        return APR_ENOPOOL;
+    if (!pattern)
+        return APR_EINVAL;
+
+    if ((cflags & APR_REG_ICASE) != 0)
+        options |= PCRE_CASELESS;
+    if ((cflags & APR_REG_NEWLINE) != 0)
+        options |= PCRE_MULTILINE;
+
+    re = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
+    if (re == NULL)
+        return APR_REG_INVARG;
+
+    are = (apr_regex_t *)apr_pcalloc(p, sizeof(*are));
+    if (!are)
+        return APR_ENOMEM;
+
+    are->re_pcre = re;
+    are->re_erroffset = erroffset;
+    are->pool = p;
+    are->re_nsub = pcre_info((const pcre *)are->re_pcre, NULL, NULL);
+    *preg = are;
+
+    return APR_SUCCESS;
+}
+
+/*************************************************
+*              Match a regular expression        *
+*************************************************/
+
+/* Unfortunately, PCRE requires 3 ints of working space for each captured
+substring, so we have to get and release working store instead of just
using
+the POSIX structures as was done in earlier releases when PCRE needed
only 2
+ints. However, if the number of possible capturing brackets is small, use a
+block of store on the stack, to reduce the use of malloc/free. The
threshold is
+in a macro that can be changed at configure time. */
+
+APR_DECLARE(apr_status_t) apr_regexec(apr_regmatch_t **pmatch,
+                                      const apr_regex_t *preg,
+                                      const char *string,
+                                      apr_size_t nmatch,
+                                      int eflags)
+{
+    int rc;
+    int options = PCRE_NOTEMPTY;
+    int *ovector = NULL;
+    apr_regmatch_t *matches = NULL;
+
+    *pmatch = matches;
+
+    if ((eflags & APR_REG_NOTBOL) != 0)
+        options |= PCRE_NOTBOL;
+    if ((eflags & APR_REG_NOTEOL) != 0)
+        options |= PCRE_NOTEOL;
+
+    ((apr_regex_t *)preg)->re_erroffset = (apr_size_t)(-1);  /* Only
has meaning after compile */
+
+    ovector = (int *)apr_palloc(preg->pool, sizeof(int) * nmatch * 3);
+    if (!ovector)
+        return APR_ENOMEM;
+    matches = (apr_regmatch_t *)apr_palloc(preg->pool, sizeof(*pmatch)
* nmatch);
+    if (!matches)
+        return APR_ENOMEM;
+
+    rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string,
+                   (int)strlen(string),
+                   0, options, ovector, nmatch * 3);
+
+    /* All captured slots were filled in */
+    if (rc == 0)
+        rc = nmatch;
+
+    if (rc >= 0) {
+        int i;
+
+        /* Fill in returned slots with the data */
+        for (i = 0; i < rc; i++) {
+            matches[i].rm_so = ovector[i * 2];
+            matches[i].rm_eo = ovector[i * 2 + 1];
+        }
+
+        /* Fill in remaining clots with -1 for both settings */
+        for (; i < nmatch; i++)
+            matches[i].rm_so = matches[i].rm_eo = -1;
+
+        *pmatch = matches;
+        return APR_SUCCESS;
+    }
+
+    switch(rc) {
+        case PCRE_ERROR_NOMATCH:
+            return APR_REG_NOMATCH;
+        case PCRE_ERROR_NULL:
+            return APR_REG_INVARG;
+        case PCRE_ERROR_BADOPTION:
+            return APR_REG_INVARG;
+        case PCRE_ERROR_BADMAGIC:
+            return APR_REG_INVARG;
+        case PCRE_ERROR_UNKNOWN_NODE:
+            return APR_REG_ASSERT;
+        case PCRE_ERROR_NOMEMORY:
+            return APR_REG_ESPACE;
+#ifdef PCRE_ERROR_MATCHLIMIT
+        case PCRE_ERROR_MATCHLIMIT:
+            return APR_REG_ESPACE;
+#endif
+#ifdef PCRE_ERROR_BADUTF8
+        case PCRE_ERROR_BADUTF8:
+            return APR_REG_INVARG;
+#endif
+#ifdef PCRE_ERROR_BADUTF8_OFFSET
+        case PCRE_ERROR_BADUTF8_OFFSET:
+            return APR_REG_INVARG;
+#endif
+    }
+    return APR_REG_ASSERT;
+}
+
+/* End of pcreposix.c */
+#else  /* APU_HAVE_PCRE */
+
+#error
+
+#endif /* ! APU_HAVE_PCRE */
Index: test/Makefile.in
===================================================================
--- test/Makefile.in	(revision 423238)
+++ test/Makefile.in	(working copy)
@@ -93,7 +93,7 @@

 testall_OBJECTS = teststrmatch.lo testuri.lo testuuid.lo abts.lo
testutil.lo \
 	testbuckets.lo testpass.lo testmd4.lo testmd5.lo testldap.lo \
-	testdaterfc.lo testdbd.lo
+	testdaterfc.lo testdbd.lo testregex.lo
 testall_LDADD =  $(TARGET_LIB_PATH)
 testall: $(testall_OBJECTS) $(testall_LDADD)
 	$(LINK) $(APRUTIL_LDFLAGS) $(testall_OBJECTS) $(testall_LDADD)
$(PROGRAM_DEPENDENCIES)
Index: test/testregex.c
===================================================================
--- test/testregex.c	(revision 0)
+++ test/testregex.c	(revision 0)
@@ -0,0 +1,83 @@
+/* Copyright 2000-2005 The Apache Software Foundation or its licensors, as
+ * applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "testutil.h"
+#include "apr_general.h"
+#include "apr_strings.h"
+#include "apr_regex.h"
+
+struct regexTest {
+    const char *pattern;
+    int options;
+    apr_status_t rv;
+} regexTests[] = {
+    { "[A-Za-z]*" , 0, APR_SUCCESS },
+};
+
+struct testCase {
+    int nPattern;
+    const char *string;
+    int nmatches;
+    int options;
+    int rv;
+} cases[] = {
+    { 0, "abc123", 1, 0, APR_SUCCESS },
+    { 0, "abcdefghijk", 1, 0, APR_SUCCESS },
+    { 0, "123456", 1, 0, APR_REG_NOMATCH },
+};
+
+void runtests(abts_case *tc, void *data)
+{
+    struct regexTest *rt;
+    int i;
+    apr_status_t rv;
+    apr_regex_t *re;
+    apr_regmatch_t *matches;
+
+    for (i = 0; i < sizeof(regexTests) / sizeof(*rt); i++) {
+        int j;
+        rt = &regexTests[i];
+        rv = apr_regcomp(&re, rt->pattern, rt->options, p);
+        ABTS_INT_EQUAL(tc, rv, rt->rv);
+
+        if (rv == APR_SUCCESS) {
+            struct testCase *t;
+            int k;
+            for (j= 0; j < sizeof(cases) / sizeof(*t); j++) {
+                t = &cases[j];
+                if (t->nPattern != i)
+                    continue;
+                rv = apr_regexec(&matches, re, t->string, t->nmatches,
+                                 t->options);
+                ABTS_INT_EQUAL(tc, rv, t->rv);
+            }
+            apr_regfree(re);
+        }
+    }
+}
+
+abts_suite *testregex(abts_suite *suite)
+{
+    suite = ADD_SUITE(suite);
+
+    abts_run_test(suite, runtests, NULL);
+
+    return suite;
+}
+
Index: test/abts_tests.h
===================================================================
--- test/abts_tests.h	(revision 423238)
+++ test/abts_tests.h	(working copy)
@@ -32,7 +32,8 @@
     {testmd5},
     {testldap},
     {testdbd},
-    {testdaterfc}
+    {testdaterfc},
+    {testregex},
 };

 #endif /* APR_TEST_INCLUDES */
Index: test/testutil.h
===================================================================
--- test/testutil.h	(revision 423238)
+++ test/testutil.h	(working copy)
@@ -53,5 +53,6 @@
 abts_suite *testldap(abts_suite *suite);
 abts_suite *testdbd(abts_suite *suite);
 abts_suite *testdaterfc(abts_suite *suite);
+abts_suite *testregex(abts_suite *suite);

 #endif /* APR_TEST_INCLUDES */
Index: build.conf
===================================================================
--- build.conf	(revision 423238)
+++ build.conf	(working copy)
@@ -21,6 +21,7 @@
   xlate/*.c
   dbd/*.c
   ssl/*.c
+  regex/*.c

 # we have no platform-specific subdirs
 platform_dirs =
Index: configure.in
===================================================================
--- configure.in	(revision 423238)
+++ configure.in	(working copy)
@@ -15,6 +15,7 @@
 sinclude(build/find_apr.m4)
 sinclude(build/dbm.m4)
 sinclude(build/dbd.m4)
+sinclude(build/pcre.m4)
 sinclude(build/ssl.m4)

 dnl Generate ./config.nice for reproducing runs of configure
@@ -160,6 +161,7 @@
 fi

 APU_FIND_SSL
+APU_FIND_PCRE

 so_ext=$APR_SO_EXT
 lib_target=$APR_LIB_TARGET
Index: build/pcre.m4
===================================================================
--- build/pcre.m4	(revision 0)
+++ build/pcre.m4	(revision 0)
@@ -0,0 +1,73 @@
+dnl -------------------------------------------------------- -*-
autoconf -*-
+dnl Copyright 2006 The Apache Software Foundation or its licensors, as
+dnl applicable.
+dnl
+dnl Licensed under the Apache License, Version 2.0 (the "License");
+dnl you may not use this file except in compliance with the License.
+dnl You may obtain a copy of the License at
+dnl
+dnl     http://www.apache.org/licenses/LICENSE-2.0
+dnl
+dnl Unless required by applicable law or agreed to in writing, software
+dnl distributed under the License is distributed on an "AS IS" BASIS,
+dnl WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
+dnl See the License for the specific language governing permissions and
+dnl limitations under the License.
+
+dnl
+dnl PCRE support
+dnl
+dnl Unless we are asked NOT to search for PCRE by the user
+dnl passing --with-pcre=no to configure, we search the de
+dnl
+
+dnl APU_FIND_PCRE: look for PCRE libraries and headers
+dnl
+AC_DEFUN([APU_FIND_PCRE], [
+  apu_have_pcre=0
+
+  AC_ARG_WITH([PCRE], [
+    --with-pcre
+  ], [
+    if test "$withval" = "yes"; then
+        apu_have_pcre=1
+
+    fi
+  ], [
+      APU_FIND_LINKEDPCRE
+  ])
+
+  if test "$apu_have_pcre" = "1"; then
+    AC_DEFINE([APU_HAVE_PCRE], 1, [Define that we have libpcre available])
+  fi
+
+])
+dnl
+
+AC_DEFUN([APU_FIND_PCRE], [
+  echo $ac_n "Checking for PCRE...${nl}"
+
+  AC_ARG_WITH(pcre, AC_HELP_STRING(--with-pcre=PATH, Path to PCRE library))
+
+  if test -d "$with_pcre" && test -x "$with_pcre/bin/pcre-config"; then
+    PCRE_CONFIG=$with_pcre/bin/pcre-config
+  elif test -x "$with_pcre"; then
+    PCRE_CONFIG=$with_pcre
+  else
+    echo $ac_n "Searching for pcre-config in PATH....${nl}"
+    AC_PATH_PROG(PCRE_CONFIG, pcre-config, false)
+  fi
+
+  if $PCRE_CONFIG --version >/dev/null 2>&1; then :;
+    echo $ac_n "Found ${PCRE_CONFIG}${nl}"
+  else
+    AC_MSG_ERROR([Did not find pcre-config script at ${PCRE_CONFIG}])
+  fi
+
+  AC_DEFINE([APU_HAVE_PCRE], 1, [Define that we have libpcre available])
+  APR_ADDTO(APRUTIL_CPPFLAGS, [`$PCRE_CONFIG --cflags`])
+  APR_ADDTO(APRUTIL_LDFLAGS, [`$PCRE_CONFIG --libs`])
+])
+dnl
+
+


Mime
View raw message