Message-Id: <19981119121647.25485.qmail@hyperreal.org>
Date: 19 Nov 1998 12:16:47 -0000
From: Philipp Knirsch <phil@linux.de>
Reply-To: phil@linux.de
To: apbugs@hyperreal.org
Subject: protocol/3422: main/util.c: ap_uuencode and ap_uudecode memory leaks
 and wrong output
Sender: apache-bugdb-owner@apache.org
Precedence: bulk


>Number:         3422
>Category:       protocol
>Synopsis:       main/util.c: ap_uuencode and ap_uudecode memory leaks and wrong output
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    apache
>State:          open
>Class:          sw-bug
>Submitter-Id:   apache
>Arrival-Date:   Thu Nov 19 04:20:00 PST 1998
>Last-Modified:
>Originator:     phil@linux.de
>Organization:
apache
>Release:        1.3.3
>Environment:
Linux kleo 2.0.35 #136 Mon Jul 27 11:27:51 MEST 1998 i586 unknown
although error occurs on any system.
>Description:
The problem is mainly off reading char * strings beyond their end and in the
ap_uuencode of writing before the start of the string in case of a empty string
as input. Also the output of ap_uuencode is only correct for strings of length
3n + 1.
>How-To-Repeat:
It is easy repeatable by writing a very simply test application which does the
following:

main()
{
    char *s1 = "abc";
    char *s2 = ap_uuencode(s1);
    char *s3 = ap_uudecode(s2);

    printf("%s\n%s\n%s\n", s1, s2, s3);
}

The last line should read "abc" but as in the original source code the last 2
encoded values are overwritten by "==" they don't appear.
>Fix:
Sure :) I already have a complete fix for the uuencode and the uudecode.

Unfortunately i can't test the EBCDIC code of the decode, but i simply use the
same idea as in the orignal code for the character conversion.

The basic problem is that the base64 encoding only works easily for blocks of
3 bytes to encode or in blocks of 4 bytes to decode. This has already sort of
been done in the original code but with some errors in the extrem case
situations (like encoding an empty string).

The solution is to introduce a small temporary character array which stores
always the next 3 resp. 4 characters of the string to encode resp. decode and
fills them up with 0 if the end of string was already reached. This will always
give a predicatable behaviour without any wrong memory accesses anymore.

Here follow the two fixed functions of the main/util.c:

--------------------- cut here -----------------------------------------------
API_EXPORT(char *) ap_uudecode(pool *p, const char *bufcoded)
{           
    int nbytesdecoded, i;
    unsigned char temp[4];
    register const unsigned char *bufin;
    register char *bufplain;
    register unsigned char *bufout;
    register int nprbytes;

    /* Strip leading whitespace. */
        
    while (*bufcoded == ' ' || *bufcoded == '\t')
        bufcoded++;
    
    /* Figure out how many characters are in the input buffer.
     * Allocate this many from the per-transaction pool for the result.
     */
#ifndef CHARSET_EBCDIC
    bufin = (const unsigned char *) bufcoded;
    while (pr2six[*(bufin++)] <= 63);
    nprbytes = (bufin - (const unsigned char *) bufcoded) - 1;
    nbytesdecoded = ((nprbytes + 3) / 4) * 3;
    
    bufplain = ap_palloc(p, nbytesdecoded + 1);
    bufout = (unsigned char *) bufplain;
    
    bufin = (const unsigned char *) bufcoded;
    
    while (nprbytes > 0) { 
        for(i=0; i<4 && pr2six[bufin[i]] != '='; i++)
            temp[i] = pr2six[bufin[i]];

        while(i<4)
            temp[i++] = 0;  

        *(bufout++) =
            (unsigned char) (temp[0] << 2 | temp[1] >> 4);
        *(bufout++) =
            (unsigned char) (temp[1] << 4 | temp[2] >> 2);
        *(bufout++) =
            (unsigned char) (temp[2] << 6 | temp[3]);
    
        bufin += 4;
        nprbytes -= 4;
    }
    
    bufplain[nbytesdecoded] = '\0';
#else /*CHARSET_EBCDIC*/
    bufin = (const unsigned char *) bufcoded;
    while (pr2six[os_toascii[(unsigned char)*(bufin++)]] <= 63);
    nprbytes = (bufin - (const unsigned char *) bufcoded) - 1;
    nbytesdecoded = ((nprbytes + 3) / 4) * 3;
    
    bufplain = ap_palloc(p, nbytesdecoded + 1);
    bufout = (unsigned char *) bufplain;
    
    bufin = (const unsigned char *) bufcoded;
 
    while (nprbytes > 0) {            
        for(i=0; i<4 && os_toascii[(unsigned char)pr2six[bufin[i]]] != '='; i++)
            temp[i] = os_toascii[(unsigned char)pr2six[bufin[i]]];      

        while(i<4)
            temp[i++] = 0;

        *(bufout++) = os_toebcdic[    
            (unsigned char) (temp[0] << 2 | temp[1] >> 4)];
        *(bufout++) = os_toebcdic[    
            (unsigned char) (temp[1] << 4 | temp[2] >> 2)];
        *(bufout++) = os_toebcdic[    
            (unsigned char) (temp[2] << 6 | temp[3])];

        bufin += 4;
        nprbytes -= 4;
    }

    bufplain[nbytesdecoded] = '\0';   
#endif /*CHARSET_EBCDIC*/
    return bufplain;
}

static const char basis_64[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";     

API_EXPORT(char *) ap_uuencode(pool *a, char *string)
{
    int i,j, len = strlen(string);    
    char *p;
    unsigned char temp[3];

    char *encoded = (char *) ap_pcalloc(a, 3 + (len+2) / 3 * 4);

    p = encoded;
    for (i = 0; i < len; i += 3) {    
        for(j=0; j<3; j++) {
            if((i+j)<len)
                temp[j] = string[i+j];
            else
                temp[j] = 0;
        }

        *p++ = basis_64[temp[0] >> 2];
        *p++ = basis_64[((temp[0] & 0x3) << 4) | ((int) (temp[1] & 0xF0) >> 4)];
        *p++ = basis_64[((temp[1] & 0xF) << 2) | ((int) (temp[2] & 0xC0) >> 6)];
        *p++ = basis_64[temp[2] & 0x3F];
    }
    *p++ = '=';
    *p++ = '=';
    *p++ = '\0';

    return encoded;
}

------------------------------------------ cut here --------------------------
>Audit-Trail:
>Unformatted:
[In order for any reply to be added to the PR database, ]
[you need to include <apbugs@Apache.Org> in the Cc line ]
[and leave the subject line UNCHANGED.  This is not done]
[automatically because of the potential for mail loops. ]
[If you do not include this Cc, your reply may be ig-   ]
[nored unless you are responding to an explicit request ]
[from a developer.                                      ]
[Reply only with text; DO NOT SEND ATTACHMENTS!         ]