httpd-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Jeff Trawick <trawi...@bellsouth.net>
Subject Re: [addt'n] Unicode URL encoding
Date Thu, 05 Oct 2000 18:11:50 GMT
"William A. Rowe, Jr." <wrowe@rowe-clan.net> writes:

> Index: src/lib/apr/include/apr_xlate.h
> ===================================================================
> RCS file: /home/cvs/apache-2.0/src/lib/apr/include/apr_xlate.h,v
> retrieving revision 1.7
> diff -u -r1.7 apr_xlate.h
> --- src/lib/apr/include/apr_xlate.h	2000/08/06 06:07:10	1.7
> +++ src/lib/apr/include/apr_xlate.h	2000/10/05 16:32:18
> @@ -184,6 +184,28 @@
>  
>  #endif  /* ! APR_HAS_XLATE */
>  
> +
> +/**
> + * Fast ucs2 to ufc8 conversion
> + * Since it is assumed that platforms that support Unicode are using
> + * ucs2, and the portable network application still lives in byte chars,
> + * this implementation will quickly make the trip back and forth for
> + * file system calls.  Even if it is not supported by the file system,
> + * and is implemented using multiple characters (of codes 128-255)
> + * it is still worthwhile verifing the string is valid by passing it
> + * through apr_ucs2_from_utf8.
> + *
> + * This was created specifically with RFC 2718 2.2.5 i18n URIs in mind.
> + *
> + * @param convset The codepage translation handle to close
> + * @retval Pointer to invalid source character, or NULL if no error.
> + */
> +APR_EXPORT(const char*) apr_ucs2_from_utf8(apr_wchar_t *out, const char *in);
> +
> +APR_EXPORT(const apr_wchar_t*) apr_utf8_from_ucs2(char *in, const apr_wchar_t *out);
> +
> +
> +
>  #ifdef __cplusplus
>  }
>  #endif

I would suggest a different API for this -- the one we already have.

For my own testing purposes, I integrated some custom translation
logic into apr_xlate, as shown below...  The mechanics of the
translation I added are not complete (just good enough to test some
interesting cases on my laptop).  Also, there are better ways to
integrate it (like storing a function pointer instead of using the
goofy builtin_to16 and builtin_from16 flags).

The reason I hacked this code in probably applies to your translation:
not all iconv() implementations are created equal, and I often use one
(not-new-enough glibc) that didn't do the translation I wanted.  I
didn't want to change mod_charset_lite to use more than one API, so I
changed APR.

The way this hardcoded support was added^H^H^H^H^Hhacked in, iconv
support is not required on the platform (subject to a buglet or two).

Index: lib/apr/i18n/unix/xlate.c
===================================================================
RCS file: /home/cvspublic/apache-2.0/src/lib/apr/i18n/unix/xlate.c,v
retrieving revision 1.12
diff -u -r1.12 xlate.c
--- lib/apr/i18n/unix/xlate.c	2000/08/20 04:14:49	1.12
+++ lib/apr/i18n/unix/xlate.c	2000/10/05 18:05:54
@@ -80,6 +80,8 @@
     char *frompage;
     char *topage;
     char *sbcs_table;
+    int builtin_to16;
+    int builtin_from16;
 #ifdef HAVE_ICONV
     iconv_t ich;
 #endif
@@ -233,9 +235,24 @@
      * expensive iconv_open()
      */
 
-    set found to non-zero if found in the cache
+    /* set found to non-zero if found in the cache */
 #endif
 
+    if (!strcmp(frompage, "ISO-8859-1") &&
+        !strcmp(topage,   "UTS-16"))
+    {
+        found = 1;
+        new->builtin_to16 = 1;
+        new->ich = (iconv_t)-1;
+    }
+    else if (!strcmp(topage,     "ISO-8859-1") &&
+             !strcmp(frompage,   "UTS-16"))
+    {
+        found = 1;
+        new->builtin_from16 = 1;
+        new->ich = (iconv_t)-1;
+    }
+
 #ifdef HAVE_ICONV
     if (!found) {
         new->ich = iconv_open(topage, frompage);
@@ -267,6 +284,52 @@
     return APR_SUCCESS;
 } 
 
+static apr_status_t apr_xlate_conv_16_to_8859_1(apr_xlate_t *convset, const char *inbuf,
+                                                apr_size_t *inbytes_left, char *outbuf,
+                                                apr_size_t *outbytes_left)
+{
+    apr_status_t rv;
+
+    while (*inbytes_left >= 2 && *outbytes_left) {
+        ++inbuf; /* skip over 0x00 */
+        *outbuf = *inbuf;
+        ++outbuf;
+        ++inbuf;
+        *inbytes_left  -= 2;
+        *outbytes_left -= 1;
+    }
+
+    if (*inbytes_left == 1 && *outbytes_left >= 1) {
+        rv = APR_INCOMPLETE;
+    }
+    else {
+        rv = 0;
+    }
+
+    return rv;
+}
+
+static apr_status_t apr_xlate_conv_8859_1_to_16(apr_xlate_t *convset, const char *inbuf,
+                                                apr_size_t *inbytes_left, char *outbuf,
+                                                apr_size_t *outbytes_left)
+{
+    apr_status_t rv;
+
+    while (*inbytes_left && *outbytes_left >= 2) {
+        *outbuf = '\0';
+        ++outbuf;
+        *outbuf = *inbuf;
+        ++outbuf;
+        ++inbuf;
+        --*inbytes_left;
+        *outbytes_left -= 2;
+    }
+
+    rv = 0;
+
+    return rv;
+}
+
 apr_status_t apr_xlate_conv_buffer(apr_xlate_t *convset, const char *inbuf,
                                    apr_size_t *inbytes_left, char *outbuf,
                                    apr_size_t *outbytes_left)
@@ -274,7 +337,19 @@
     apr_status_t status = APR_SUCCESS;
 #ifdef HAVE_ICONV
     size_t translated;
+#endif
+

better way to do this:

     if (convset->builtin) {
	return convset->builtin(convset, inbuf, inbytes_left, outbuf, 
	                        outbytes_left);
     }

+    if (convset->builtin_to16) {
+        return apr_xlate_conv_8859_1_to_16(convset, inbuf, inbytes_left,
+                                           outbuf, outbytes_left);
+    }
 
+    if (convset->builtin_from16) {
+        return apr_xlate_conv_16_to_8859_1(convset, inbuf, inbytes_left,
+                                           outbuf, outbytes_left);
+    }
+
+#ifdef HAVE_ICONV
     if (convset->ich != (iconv_t)-1) {
         const char *inbufptr = inbuf;
         char *outbufptr = outbuf;

-- 
Jeff Trawick | trawick@ibm.net | PGP public key at web site:
     http://www.geocities.com/SiliconValley/Park/9289/
          Born in Roswell... married an alien...

Mime
View raw message