httpd-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "William A. Rowe, Jr." <wr...@rowe-clan.net>
Subject RE: [addt'n] Unicode URL encoding
Date Thu, 05 Oct 2000 18:33:07 GMT
So...

are you suggesting I commit the utf8<->ucs2 translation, but hook it 
into the existing apr_xlate lingo?  Just checking :-)

Bill

> -----Original Message-----
> From: Jeff Trawick [mailto:trawickj@bellsouth.net]
> Sent: Thursday, October 05, 2000 1:12 PM
> To: new-httpd@apache.org
> Subject: Re: [addt'n] Unicode URL encoding
> 
> 
> "William A. Rowe, Jr." <wrowe@rowe-clan.net> writes:
> 
> > Index: src/lib/apr/include/apr_xlate.h
> > ===================================================================
> > RCS file: /home/cvs/apache-2.0/src/lib/apr/include/apr_xlate.h,v
> > retrieving revision 1.7
> > diff -u -r1.7 apr_xlate.h
> > --- src/lib/apr/include/apr_xlate.h	2000/08/06 06:07:10	1.7
> > +++ src/lib/apr/include/apr_xlate.h	2000/10/05 16:32:18
> > @@ -184,6 +184,28 @@
> >  
> >  #endif  /* ! APR_HAS_XLATE */
> >  
> > +
> > +/**
> > + * Fast ucs2 to ufc8 conversion
> > + * Since it is assumed that platforms that support Unicode 
> are using
> > + * ucs2, and the portable network application still lives 
> in byte chars,
> > + * this implementation will quickly make the trip back and 
> forth for
> > + * file system calls.  Even if it is not supported by the 
> file system,
> > + * and is implemented using multiple characters (of codes 128-255)
> > + * it is still worthwhile verifing the string is valid by 
> passing it
> > + * through apr_ucs2_from_utf8.
> > + *
> > + * This was created specifically with RFC 2718 2.2.5 i18n 
> URIs in mind.
> > + *
> > + * @param convset The codepage translation handle to close
> > + * @retval Pointer to invalid source character, or NULL if 
> no error.
> > + */
> > +APR_EXPORT(const char*) apr_ucs2_from_utf8(apr_wchar_t 
> *out, const char *in);
> > +
> > +APR_EXPORT(const apr_wchar_t*) apr_utf8_from_ucs2(char 
> *in, const apr_wchar_t *out);
> > +
> > +
> > +
> >  #ifdef __cplusplus
> >  }
> >  #endif
> 
> I would suggest a different API for this -- the one we already have.
> 
> For my own testing purposes, I integrated some custom translation
> logic into apr_xlate, as shown below...  The mechanics of the
> translation I added are not complete (just good enough to test some
> interesting cases on my laptop).  Also, there are better ways to
> integrate it (like storing a function pointer instead of using the
> goofy builtin_to16 and builtin_from16 flags).
> 
> The reason I hacked this code in probably applies to your translation:
> not all iconv() implementations are created equal, and I often use one
> (not-new-enough glibc) that didn't do the translation I wanted.  I
> didn't want to change mod_charset_lite to use more than one API, so I
> changed APR.
> 
> The way this hardcoded support was added^H^H^H^H^Hhacked in, iconv
> support is not required on the platform (subject to a buglet or two).
> 
> Index: lib/apr/i18n/unix/xlate.c
> ===================================================================
> RCS file: /home/cvspublic/apache-2.0/src/lib/apr/i18n/unix/xlate.c,v
> retrieving revision 1.12
> diff -u -r1.12 xlate.c
> --- lib/apr/i18n/unix/xlate.c	2000/08/20 04:14:49	1.12
> +++ lib/apr/i18n/unix/xlate.c	2000/10/05 18:05:54
> @@ -80,6 +80,8 @@
>      char *frompage;
>      char *topage;
>      char *sbcs_table;
> +    int builtin_to16;
> +    int builtin_from16;
>  #ifdef HAVE_ICONV
>      iconv_t ich;
>  #endif
> @@ -233,9 +235,24 @@
>       * expensive iconv_open()
>       */
>  
> -    set found to non-zero if found in the cache
> +    /* set found to non-zero if found in the cache */
>  #endif
>  
> +    if (!strcmp(frompage, "ISO-8859-1") &&
> +        !strcmp(topage,   "UTS-16"))
> +    {
> +        found = 1;
> +        new->builtin_to16 = 1;
> +        new->ich = (iconv_t)-1;
> +    }
> +    else if (!strcmp(topage,     "ISO-8859-1") &&
> +             !strcmp(frompage,   "UTS-16"))
> +    {
> +        found = 1;
> +        new->builtin_from16 = 1;
> +        new->ich = (iconv_t)-1;
> +    }
> +
>  #ifdef HAVE_ICONV
>      if (!found) {
>          new->ich = iconv_open(topage, frompage);
> @@ -267,6 +284,52 @@
>      return APR_SUCCESS;
>  } 
>  
> +static apr_status_t apr_xlate_conv_16_to_8859_1(apr_xlate_t 
> *convset, const char *inbuf,
> +                                                apr_size_t 
> *inbytes_left, char *outbuf,
> +                                                apr_size_t 
> *outbytes_left)
> +{
> +    apr_status_t rv;
> +
> +    while (*inbytes_left >= 2 && *outbytes_left) {
> +        ++inbuf; /* skip over 0x00 */
> +        *outbuf = *inbuf;
> +        ++outbuf;
> +        ++inbuf;
> +        *inbytes_left  -= 2;
> +        *outbytes_left -= 1;
> +    }
> +
> +    if (*inbytes_left == 1 && *outbytes_left >= 1) {
> +        rv = APR_INCOMPLETE;
> +    }
> +    else {
> +        rv = 0;
> +    }
> +
> +    return rv;
> +}
> +
> +static apr_status_t apr_xlate_conv_8859_1_to_16(apr_xlate_t 
> *convset, const char *inbuf,
> +                                                apr_size_t 
> *inbytes_left, char *outbuf,
> +                                                apr_size_t 
> *outbytes_left)
> +{
> +    apr_status_t rv;
> +
> +    while (*inbytes_left && *outbytes_left >= 2) {
> +        *outbuf = '\0';
> +        ++outbuf;
> +        *outbuf = *inbuf;
> +        ++outbuf;
> +        ++inbuf;
> +        --*inbytes_left;
> +        *outbytes_left -= 2;
> +    }
> +
> +    rv = 0;
> +
> +    return rv;
> +}
> +
>  apr_status_t apr_xlate_conv_buffer(apr_xlate_t *convset, 
> const char *inbuf,
>                                     apr_size_t *inbytes_left, 
> char *outbuf,
>                                     apr_size_t *outbytes_left)
> @@ -274,7 +337,19 @@
>      apr_status_t status = APR_SUCCESS;
>  #ifdef HAVE_ICONV
>      size_t translated;
> +#endif
> +
> 
> better way to do this:
> 
>      if (convset->builtin) {
> 	return convset->builtin(convset, inbuf, inbytes_left, outbuf, 
> 	                        outbytes_left);
>      }
> 
> +    if (convset->builtin_to16) {
> +        return apr_xlate_conv_8859_1_to_16(convset, inbuf, 
> inbytes_left,
> +                                           outbuf, outbytes_left);
> +    }
>  
> +    if (convset->builtin_from16) {
> +        return apr_xlate_conv_16_to_8859_1(convset, inbuf, 
> inbytes_left,
> +                                           outbuf, outbytes_left);
> +    }
> +
> +#ifdef HAVE_ICONV
>      if (convset->ich != (iconv_t)-1) {
>          const char *inbufptr = inbuf;
>          char *outbufptr = outbuf;
> 
> -- 
> Jeff Trawick | trawick@ibm.net | PGP public key at web site:
>      http://www.geocities.com/SiliconValley/Park/9289/
>           Born in Roswell... married an alien...
> 

Mime
View raw message