httpd-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Jeff Trawick <trawi...@bellsouth.net>
Subject [PATCH] 2nd draft of APR wrapper for iconv
Date Wed, 19 Apr 2000 02:38:09 GMT
Beyond what was discussed on the list earlier today, I made several
additional changes which hopefully are acceptable:

1) in honor of Ryan's new ap_cp_XXX function names, I changed:

   a) the name of the handle type from ap_iconv_t to ap_cp_t
   b) the name of the header file from apr_iconv.h to apr_cp.h
   c) the name of the Unix source file from
      lib/apr/iconv/unix/apr_iconv.c to lib/apr/cp/unix/cp.c

2) I made the choice of whether or not we have a useless
   implementation less tightly coupled to whether or not we have
   iconv() available (not much less tightly, though)

There still is no pod doc, but that will come soon enough.

There is one minor problem I still need to address:

In my current implementation, listed below, we need to know whether or
not we HAVE_ICONV when we compile the APR client app.  This means we
need to include apr_config.h from apr_cp.h.  Bad!  (We have a lot of
symbols which aren't namespace protected.)

Desired solution: 

Always emit prototypes in apr_cp.h but have empty functions in cp.c
when we don't HAVE_ICONV.  These functions would return APR_ENOTIMPL,
of course.

Any problems with this solution?

Thanks for your comments,

Jeff

on with the code:

files off to the attic: lib/apr/include/apr_iconv.h
========================================================
new directories:        lib/apr/cp
                        lib/apr/cp/unix
========================================================
new file lib/apr/cp/unix/cp.c:

/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2000 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */

#include "apr_config.h"

#include "apr_lib.h"
#include "apr_cp.h"

/* apr_cp.h has the logic to decide whether or not translations are
 * implemented; it provides macros for the functions when no implementation
 * is available, so be sure not to generate code here.
 */

#ifdef APR_CP_IMPLEMENTED

#ifdef HAVE_ICONV_H
#include <iconv.h>
#endif

#ifndef min
#define min(x,y) ((x) <= (y) ? (x) : (y))
#endif

struct ap_cp_t {
    ap_pool_t *pool;
    char *frompage;
    char *topage;
    char *sbcs_table;
#ifdef HAVE_ICONV
    iconv_t ich;
#endif
};

/* get_default_codepage()
 *
 * simple heuristic to determine codepage of source code so that
 * literal strings (e.g., "GET /\r\n") in source code can be translated
 * properly
 *
 * If appropriate, a symbol can be set at configure time to determine
 * this.  On EBCDIC platforms, it will be important how the code was
 * unpacked.
 */

static const char *get_default_codepage(void)
{
#ifdef __MVS__
    #ifdef __CODESET__
        return __CODESET__;
    #else
        return "IBM-1047";
    #endif
#endif

    if ('}' == 0xD0) {
        return "IBM-1047";
    }

    if ('{' == 0xFB) {
        return "EDF04";
    }

    if ('A' == 0xC1) {
        return "EBCDIC"; /* not useful */
    }

    if ('A' == 0x41) {
        return "ISO8859-1"; /* not necessarily true */
    }

    return "unknown";
}

static ap_status_t ap_cp_cleanup(void *convset)
{
#ifdef HAVE_ICONV
    ap_cp_t *old = convset;

    if (old->ich != (iconv_t)-1) {
        if (iconv_close(old->ich)) {
            return errno;
        }
    }
#endif
    return APR_SUCCESS;
}

#ifdef HAVE_ICONV
static void check_sbcs(ap_cp_t *convset)
{
    char inbuf[256], outbuf[256];
    char *inbufptr = inbuf, *outbufptr = outbuf;
    size_t inbytes_left, outbytes_left;
    int i;
    size_t translated;

    for (i = 0; i < sizeof(inbuf); i++) {
        inbuf[i] = i;
    }

    inbytes_left = outbytes_left = sizeof(inbuf);
    translated = iconv(convset->ich, (const char **)&inbufptr, 
                       &inbytes_left, &outbufptr, &outbytes_left);
    if (translated != (size_t) -1 &&
        inbytes_left == 0 &&
        outbytes_left == 0) {
        /* hurray... this is simple translation; save the table,
         * close the iconv descriptor
         */
        
        convset->sbcs_table = ap_palloc(convset->pool, sizeof(outbuf));
        memcpy(convset->sbcs_table, outbuf, sizeof(outbuf));
        iconv_close(convset->ich);
        convset->ich = (iconv_t)-1;

        /* TODO: add the table to the cache */
    }
}
#endif /* HAVE_ICONV */

ap_status_t ap_cp_open(ap_cp_t **convset, const char *topage,
                       const char *frompage, ap_pool_t *pool)
{
    ap_status_t status;
    ap_cp_t *new;
    int found = 0;

    *convset = NULL;
    
    if (!topage) {
        topage = get_default_codepage();
    }

    if (!frompage) {
        frompage = get_default_codepage();
    }
    
    new = (ap_cp_t *)ap_palloc(pool, sizeof(ap_cp_t));
    if (!new) {
        return APR_ENOMEM;
    }

    new->pool = pool;
    new->topage = ap_pstrdup(pool, topage);
    new->frompage = ap_pstrdup(pool, frompage);
    if (!new->topage || !new->frompage) {
        return APR_ENOMEM;
    }

#ifdef TODO
    /* search cache of codepage pairs; we may be able to avoid the
     * expensive iconv_open()
     */

    set found to non-zero if found in the cache
#endif

#ifdef HAVE_ICONV
    if (!found) {
        new->ich = iconv_open(topage, frompage);
        if (new->ich == (iconv_t)-1) {
            return errno;
        }
        found = 1;
        check_sbcs(new);
    }
#endif /* HAVE_ICONV */

    if (found) {
        *convset = new;
        ap_register_cleanup(pool, (void *)new, ap_cp_cleanup,
                            ap_null_cleanup);
        status = APR_SUCCESS;
    }
    else {
        status = EINVAL; /* same as what iconv() would return if it
                            couldn't handle the pair */
    }
    
    return status;
}

ap_status_t ap_cp_conv_buffer(ap_cp_t *convset, const char *inbuf,
                              ap_size_t *inbytes_left, char *outbuf,
                              ap_size_t *outbytes_left)
{
    ap_status_t status = APR_SUCCESS;
#ifdef HAVE_ICONV
    size_t translated;

    if (convset->ich != (iconv_t)-1) {
        char *inbufptr = (char *)inbuf;
        char *outbufptr = outbuf;
        
        translated = iconv(convset->ich, (const char **)&inbufptr, 
                           inbytes_left, &outbufptr, outbytes_left);
        if (translated == (size_t)-1) {
            return errno;
        }
    }
    else
#endif
    {
        int to_convert = min(*inbytes_left, *outbytes_left);
        int converted = to_convert;
        char *table = convset->sbcs_table;
        
        while (to_convert) {
            *outbuf = table[(unsigned char)*inbuf];
            ++outbuf;
            ++inbuf;
            --to_convert;
        }
        *inbytes_left -= converted;
        *outbytes_left -= converted;
    }

    return status;
}

ap_status_t ap_cp_close(ap_cp_t *convset)
{
    ap_status_t status;

    if ((status = ap_cp_cleanup(convset)) == APR_SUCCESS) {
        ap_kill_cleanup(convset->pool, convset, ap_cp_cleanup);
    }

    return status;
}

#endif /* APR_CP_IMPLEMENTED */

========================================================
new file lib/apr/cp/unix/Makefile.in


RM=@RM@
CC=@CC@
RANLIB=@RANLIB@
CFLAGS=@CFLAGS@ @OPTIM@
LIBS=@LIBS@
LDFLAGS=@LDFLAGS@ $(LIBS)
INCDIR=../../include
INCLUDES=-I$(INCDIR) -I.

OBJS=cp.o

.c.o:
	$(CC) $(CFLAGS) -c $(INCLUDES) $<

all: $(OBJS)

clean:
	$(RM) -f *.o *.a *.so

distclean: clean
	-$(RM) -f Makefile

$(OBJS): Makefile

#
# We really don't expect end users to use this rule.  It works only with
# gcc, and rebuilds Makefile.tmpl.  You have to re-run Configure after
# using it.
#
depend:
	cp Makefile.in Makefile.in.bak \
	    && sed -ne '1,/^# DO NOT REMOVE/p' Makefile.in > Makefile.new \
	    && gcc -MM $(INCLUDES) $(CFLAGS) *.c >> Makefile.new \
	    && sed -e '1,$$s: $(INCDIR)/: $$(INCDIR)/:g' \
	           -e '1,$$s: $(OSDIR)/: $$(OSDIR)/:g' Makefile.new \
		> Makefile.in \
	    && rm Makefile.new

# DO NOT REMOVE
cp.o: cp.c $(INCDIR)/apr_config.h $(INCDIR)/apr_lib.h \
 $(INCDIR)/apr_general.h $(INCDIR)/apr.h \
 $(INCDIR)/apr_errno.h $(INCDIR)/apr_file_io.h \
 $(INCDIR)/apr_time.h $(INCDIR)/apr_thread_proc.h \
 $(INCDIR)/apr_cp.h

================================================
new file lib/apr/include/apr_cp.h

/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2000 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */

#ifndef APR_CP_H
#define APR_CP_H

#include "apr_general.h"
#include "apr_time.h"
#include "apr_errno.h"
#include "apr_config.h" /* bad! this will be seen by clients */

#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */

#if defined(HAVE_ICONV)
#define APR_CP_IMPLEMENTED
#endif
    
#if !defined(APR_CP_IMPLEMENTED)

typedef void                         ap_cp_t;

/* For platforms where we don't bother with translating between codepages, 
 * these are macros which always return failure.
 */

#define ap_cp_open(convset, topage, frompage, pool) APR_ENOTIMPL

#define ap_cp_conv_buffer(convset, inbuf, inbytes_left, outbuf, \
                          outbytes_left) APR_ENOTIMPL

/* The purpose of ap_cp_conv_char is to translate one character
 * at a time.  This needs to be written carefully so that it works
 * with double-byte character sets. 
 */
#define ap_cp_conv_char(convset, inchar, outchar) APR_ENOTIMPL

#define ap_cp_close(convset) APR_ENOTIMPL

#else  /* ! APR_CP_IMPLEMENTED */

typedef struct ap_cp_t            ap_cp_t;

ap_status_t ap_cp_open(ap_cp_t **convset, const char *topage, 
                       const char *frompage, ap_pool_t *pool);
    
ap_status_t ap_cp_conv_buffer(ap_cp_t *convset, const char *inbuf, 
                              ap_size_t *inbytes_left, char *outbuf,
                              ap_size_t *outbytes_left);

#define APR_DEFAULT_CODEPAGE NULL

/* The purpose of ap_cp_conv_char is to translate one character
 * at a time.  This needs to be written carefully so that it works
 * with double-byte character sets. 
 */
ap_status_t ap_cp_conv_char(ap_cp_t *convset, char inchar, char outchar);

ap_status_t ap_cp_close(ap_cp_t *convset);

#endif  /* ! APR_CP_IMPLEMENTED */

#ifdef __cplusplus
}
#endif

#endif  /* ! APR_CP_H */

Index: src/lib/apr/configure.in
===================================================================
RCS file: /cvs/apache/apache-2.0/src/lib/apr/configure.in,v
retrieving revision 1.71
diff -u -r1.71 configure.in
--- configure.in        2000/04/15 19:05:12     1.71
+++ configure.in        2000/04/19 02:39:27
@@ -15,7 +15,7 @@
 # These added to allow default directories to be used...
 DEFAULT_OSDIR="unix"
 echo "(Default will be ${DEFAULT_OSDIR})"
-MODULES="file_io network_io threadproc misc locks time mmap shmem dso"
+MODULES="file_io network_io threadproc misc locks time mmap shmem dso cp"
 
 dnl Process this file with autoconf to produce a configure script.
 AC_INIT(configure.in)
@@ -124,6 +124,7 @@
 AC_CHECK_FUNC(inet_network, [ inet_network="1" ], [ inet_network="0" ])
 AC_CHECK_FUNC(_getch)
 AC_CHECK_FUNCS(gmtime_r localtime_r)
+AC_CHECK_FUNCS(iconv)
 AC_SUBST(sendfile)
 AC_SUBST(fork)
 AC_SUBST(inet_addr)
@@ -176,6 +177,7 @@
 AC_CHECK_HEADERS(arpa/inet.h)
 AC_CHECK_HEADERS(netinet/in.h, netinet_inh="1", netinet_inh="0")
 AC_CHECK_HEADERS(netinet/tcp.h)
+AC_CHECK_HEADERS(iconv.h)
 
 AC_CHECK_HEADERS(sys/file.h)
 AC_CHECK_HEADERS(sys/ioctl.h)   

Index: src/lib/apr/test/ab_apr.c
===================================================================
RCS file: /cvs/apache/apache-2.0/src/lib/apr/test/ab_apr.c,v
retrieving revision 1.24
diff -u -r1.24 ab_apr.c
--- ab_apr.c	2000/04/17 03:39:06	1.24
+++ ab_apr.c	2000/04/19 02:39:30
@@ -97,6 +97,14 @@
 
 /*  -------------------------------------------------------------------- */
 
+#if 'A' != 0x41
+/* Hmmm... This source code isn't being compiled in ASCII.
+ * In order for data that flows over the network to make
+ * sense, we need to translate to/from ASCII.
+ */
+#define NOT_ASCII
+#endif
+
 /* affects include files on Solaris */
 #define BSD_COMP
 
@@ -104,6 +112,9 @@
 #include "apr_file_io.h"
 #include "apr_time.h"
 #include "apr_getopt.h"
+#ifdef NOT_ASCII
+#include "apr_cp.h"
+#endif
 #include <string.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -193,6 +204,9 @@
 ap_pool_t *cntxt;
 
 ap_pollfd_t *readbits;
+#ifdef NOT_ASCII
+ap_cp_t *fromascii, *toascii;
+#endif
 
 /* --------------------------------------------------------- */
 
@@ -538,11 +552,19 @@
         int l = 4;
         int space = CBUFFSIZE - c->cbx - 1;	/* -1 to allow for 0 terminator */
         int tocopy = (space < r) ? space : r;
-#ifndef CHARSET_EBCDIC
+#ifdef NOT_ASCII
+        ap_size_t inbytes_left = space, outbytes_left = space;
+        
+        status = ap_cp_conv_buffer(fromascii, buffer, &inbytes_left,
+                                   c->cbuff + c->cbx, &outbytes_left);
+        if (status || inbytes_left || outbytes_left) {
+            fprintf(stderr, "only simple translation is supported (%d/%u/%u)\n",
+                    status, inbytes_left, outbytes_left);
+            exit(1);
+        }
+#else
         memcpy(c->cbuff + c->cbx, buffer, space);
-#else /*CHARSET_EBCDIC */
-        ascii2ebcdic(c->cbuff + c->cbx, buffer, space);
-#endif /*CHARSET_EBCDIC */
+#endif /*NOT_ASCII */
         c->cbx += tocopy;
         space -= tocopy;
         c->cbuff[c->cbx] = 0;	/* terminate for benefit of strstr */
@@ -671,6 +693,10 @@
     ap_interval_time_t timeout;
     ap_int16_t rv;
     int i;
+#ifdef NOT_ASCII
+    ap_status_t status;
+    ap_size_t inbytes_left, outbytes_left;
+#endif
 
     if (!use_html) {
         printf("Benchmarking %s (be patient)...", hostname);
@@ -719,9 +745,16 @@
 
     reqlen = strlen(request);
 
-#ifdef CHARSET_EBCDIC
-    ebcdic2ascii(request, request, reqlen);
-#endif /*CHARSET_EBCDIC */
+#ifdef NOT_ASCII
+    inbytes_left = outbytes_left = reqlen;
+    status = ap_cp_conv_buffer(toascii, request, &inbytes_left,
+                               request, &outbytes_left);
+    if (status || inbytes_left || outbytes_left) {
+        fprintf(stderr, "only simple translation is supported (%d/%u/%u)\n",
+                status, inbytes_left, outbytes_left);
+        exit(1);
+    }
+#endif /*NOT_ASCII */
 
     /* ok - lets start */
     start = ap_now();
@@ -886,6 +919,9 @@
 int main(int argc, char **argv)
 {
     int c, r;
+#ifdef NOT_ASCII
+    ap_status_t status;
+#endif
 
     /* ap_table_t defaults  */
     tablestring = "";
@@ -896,6 +932,19 @@
     atexit(ap_terminate);
     ap_create_pool(&cntxt, NULL);
 
+#ifdef NOT_ASCII
+    status = ap_cp_open(&toascii, "ISO8859-1", APR_DEFAULT_CODEPAGE, cntxt);
+    if (status) {
+        fprintf(stderr, "ap_codepage_open(to ASCII)->%d\n", status);
+        exit(1);
+    }
+    status = ap_cp_open(&fromascii, APR_DEFAULT_CODEPAGE, "ISO8859-1", cntxt);
+    if (status) {
+        fprintf(stderr, "ap_codepage_open(from ASCII)->%d\n", status);
+        exit(1);
+    }
+#endif
+    
     ap_optind = 1;
     while (ap_getopt(argc, argv, "n:c:t:T:p:v:kVhwx:y:z:", &c, cntxt) == APR_SUCCESS)
{
         switch (c) {

-- 
Jeff Trawick | trawick@ibm.net | PGP public key at web site:
     http://www.geocities.com/SiliconValley/Park/9289/
          Born in Roswell... married an alien...

Mime
View raw message