Message-Id: <199702070440.WAA24662@sierra.zyzzyva.com>
To: new-httpd@hyperreal.com
Subject: Re: [PATCH] chunked-encoding performance improvement 
In-reply-to: dgaudet's message of Sun, 02 Feb 1997 11:54:30 -0800.
         <Pine.LNX.3.95dg2.970202114040.7628B-100000@twinlark.arctic.org>
Mime-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Date: Thu, 06 Feb 1997 22:40:05 -0600
From: Randy Terbush <randy@zyzzyva.com>
Sender: new-httpd-owner@apache.org
Precedence: bulk
Reply-To: new-httpd@hyperreal.com

+1

I worked through the SSL issues and this seems to work fine.

> Here it is.  This patch eliminates all "wasteful" bflush() calls and
> attempts to be as efficient with write()s as possible without making the
> code too ugly. 
> 
> Essentially it makes the chunk header and trailer a part of the output
> buffer, and maintains enough state in the BUFF to be able to fix up the
> header when it is ready to flush the chunk.  It uses writev() when it's
> not efficient for us to buffer a large write -- i.e. a bwrite() of a large
> block will first fill the buffer, and chunk that, then if the remaining
> part is larger than bufsiz it will be chunked without first copying it to
> a buffer. 
> 
> I added NO_WRITEV which should be defined on platforms that do not have
> writev().  They will experience a performance penalty on those large
> writes, but that's just too bad.  A keen porter could implement a writev() 
> that did userland copying to assemble a single write().  For now,
> NO_WRITEV is not defined anywhere. 
> 
> I have tested this on various types of requests, and poured over output
> dumps to ensure it does the right thing.  Chunked encoding is forced when
> the response contains no Content-Length, so testing with CGIs and SSIs is
> a good approach.  At one point I lowered the DEFAULT_BUFSIZE in buff.c in
> order to be sure the large write support was working.
> 
> This may look like a large change this late in the cycle, so I should
> point out that without it the present code spits a packet for the response
> headers, and three packets for each chunk.  This makes persistent
> connections far more expensive than they should be.
> 
> Dean
> 
> Index: buff.c
> ===================================================================
> RCS file: /export/home/cvs/apache/src/buff.c,v
> retrieving revision 1.16
> diff -c -3 -r1.16 buff.c
> *** buff.c	1997/01/30 02:42:57	1.16
> --- buff.c	1997/02/02 19:32:45
> ***************
> *** 57,62 ****
> --- 57,65 ----
>   #ifndef NO_UNISTD_H
>   #include <unistd.h>
>   #endif
> + #ifndef NO_WRITEV
> + #include <sys/uio.h>
> + #endif
>   
>   #include "conf.h"
>   #include "alloc.h"
> ***************
> *** 115,127 ****
>       if (flags & B_RD) fb->inbase = palloc(p, fb->bufsiz);
>       else fb->inbase = NULL;
>   
> !     if (flags & B_WR) fb->outbase = palloc(p, fb->bufsiz);
>       else fb->outbase = NULL;
>   
>       fb->inptr = fb->inbase;
>   
>       fb->incnt = 0;
>       fb->outcnt = 0;
>       fb->error = NULL;
>       fb->bytes_sent = 0L;
>   
> --- 118,133 ----
>       if (flags & B_RD) fb->inbase = palloc(p, fb->bufsiz);
>       else fb->inbase = NULL;
>   
> !     /* overallocate so that we can put a chunk trailer of CRLF into this
> !      * buffer */
> !     if (flags & B_WR) fb->outbase = palloc(p, fb->bufsiz + 2);
>       else fb->outbase = NULL;
>   
>       fb->inptr = fb->inbase;
>   
>       fb->incnt = 0;
>       fb->outcnt = 0;
> +     fb->outchunk = -1;
>       fb->error = NULL;
>       fb->bytes_sent = 0L;
>   
> ***************
> *** 175,196 ****
>   }
>   
>   /*
>    * Set a flag on (1) or off (0).
>    */
>   int bsetflag(BUFF *fb, int flag, int value)
>   {
> !     if( flag & B_CHUNK ) {
> ! 	/*
> ! 	 * This shouldn't be true for much longer -djg
> ! 	 * Currently, these flags work
> ! 	 * as a function of the bcwrite() function, so we make sure to
> ! 	 * flush before setting them one way or the other; otherwise
> ! 	 * writes could end up with the wrong flag.
> ! 	 */
> ! 	bflush(fb);
>       }
> -     if (value) fb->flags |= flag;
> -     else fb->flags &= ~flag;
>       return value;
>   }
>   
> --- 181,285 ----
>   }
>   
>   /*
> +  * start chunked encoding
> +  */
> + static void
> + start_chunk( BUFF *fb )
> + {
> +     char chunksize[16];	/* Big enough for practically anything */
> +     int chunk_header_size;
> + 
> +     if( fb->outchunk != -1 ) {
> + 	/* already chunking */
> + 	return;
> +     }
> +     if( !(fb->flags & B_WR) ) {
> + 	/* unbuffered writes */
> + 	return;
> +     }
> + 
> +     /* we know that the chunk header is going to take at least 3 bytes... */
> +     chunk_header_size = ap_snprintf( chunksize, sizeof(chunksize),
> + 	"%x\015\012", fb->bufsiz - fb->outcnt - 3 );
> +     /* we need at least the header_len + at least 1 data byte
> +      * remember that we've overallocated fb->outbase so that we can always
> +      * fit the two byte CRLF trailer
> +      */
> +     if( fb->bufsiz - fb->outcnt < chunk_header_size + 1 ) {
> + 	bflush(fb);
> +     }
> +     /* assume there's enough space now */
> +     memcpy( &fb->outbase[fb->outcnt], chunksize, chunk_header_size );
> +     fb->outchunk = fb->outcnt;
> +     fb->outcnt += chunk_header_size;
> +     fb->outchunk_header_size = chunk_header_size;
> + }
> + 
> + 
> + /*
> +  * end a chunk -- tweak the chunk_header from start_chunk, and add a trailer
> +  */
> + static void
> + end_chunk( BUFF *fb )
> + {
> +     int i;
> + 
> +     if( fb->outchunk == -1 ) {
> + 	/* not chunking */
> + 	return;
> +     }
> + 
> +     if( fb->outchunk + fb->outchunk_header_size == fb->outcnt ) {
> + 	/* nothing was written into this chunk, and we can't write a 0 size
> + 	 * chunk because that signifies EOF, so just erase it
> + 	 */
> + 	fb->outcnt = fb->outchunk;
> + 	fb->outchunk = -1;
> + 	return;
> +     }
> + 
> +     /* we know this will fit because of how we wrote it in start_chunk() */
> +     i = ap_snprintf( &fb->outbase[fb->outchunk], fb->outchunk_header_size,
> + 	"%x", fb->outcnt - fb->outchunk - fb->outchunk_header_size );
> + 
> +     /* we may have to tack some trailing spaces onto the number we just wrote
> +      * in case it was smaller than our estimated size.  We've also written
> +      * a \0 into the buffer with ap_snprintf so we might have to put a
> +      * \r back in.
> +      */
> +     i += fb->outchunk;
> +     while( fb->outbase[i] != '\015' && fb->outbase[i] != '\012' ) {
> + 	fb->outbase[i++] = ' ';
> +     }
> +     if( fb->outbase[i] == '\012' ) {
> + 	/* we overwrote the \r, so put it back */
> + 	fb->outbase[i-1] = '\015';
> +     }
> + 
> +     /* tack on the trailing CRLF, we've reserved room for this */
> +     fb->outbase[fb->outcnt++] = '\015';
> +     fb->outbase[fb->outcnt++] = '\012';
> + 
> +     fb->outchunk = -1;
> + }
> + 
> + 
> + /*
>    * Set a flag on (1) or off (0).
>    */
>   int bsetflag(BUFF *fb, int flag, int value)
>   {
> !     if (value) {
> ! 	fb->flags |= flag;
> ! 	if( flag & B_CHUNK ) {
> ! 	    start_chunk(fb);
> ! 	}
> !     } else {
> ! 	fb->flags &= ~flag;
> ! 	if( flag & B_CHUNK ) {
> ! 	    end_chunk(fb);
> ! 	}
>       }
>       return value;
>   }
>   
> ***************
> *** 542,571 ****
>    * an interim solution pending a complete rewrite of all this stuff in
>    * 2.0, using something like sfio stacked disciplines or BSD's funopen().
>    */
> ! int bcwrite(BUFF *fb, const void *buf, int nbyte) {
>   
> !     if (fb->flags & B_CHUNK) {
> ! 	char chunksize[16];	/* Big enough for practically anything */
>   
> ! 	ap_snprintf(chunksize, sizeof(chunksize), "%x\015\012", nbyte);
> ! 	if( write_it_all(fb->fd, chunksize, strlen(chunksize)) == -1 ) {
> ! 	    return( -1 );
> ! 	}
> ! 	if( write_it_all(fb->fd, buf, nbyte) == -1 ) {
>   	    return( -1 );
>   	}
> ! 	if( write_it_all(fb->fd, "\015\012", 2) == -1 ) {
> ! 	    return( -1 );
>   	}
> - 	return( nbyte );
>       }
> !     return( write(fb->fd, buf, nbyte) );
>   }
>   
>   /*
>    * Write nbyte bytes.
>    * Only returns fewer than nbyte if an error ocurred.
>    * Returns -1 if no bytes were written before the error ocurred.
>    */
>   int
>   bwrite(BUFF *fb, const void *buf, int nbyte)
> --- 631,708 ----
>    * an interim solution pending a complete rewrite of all this stuff in
>    * 2.0, using something like sfio stacked disciplines or BSD's funopen().
>    */
> ! static int
> ! bcwrite(BUFF *fb, const void *buf, int nbyte) {
>   
> !     char chunksize[16];	/* Big enough for practically anything */
> ! #ifndef NO_WRITEV
> !     struct iovec vec[3];
> !     int i, rv;
> ! #endif
>   
> !     if( !(fb->flags & B_CHUNK) ) return write( fb->fd, buf, nbyte );
> ! 
> ! #ifdef NO_WRITEV
> !     /* without writev() this has poor performance, too bad */
> ! 
> !     ap_snprintf(chunksize, sizeof(chunksize), "%x\015\012", nbyte);
> !     if( write_it_all(fb->fd, chunksize, strlen(chunksize)) == -1 ) {
> ! 	return( -1 );
> !     }
> !     if( write_it_all(fb->fd, buf, nbyte) == -1 ) {
> ! 	return( -1 );
> !     }
> !     if( write_it_all(fb->fd, "\015\012", 2) == -1 ) {
> ! 	return( -1 );
> !     }
> !     return( nbyte );
> ! #else
> ! #define NVEC	(sizeof(vec)/sizeof(vec[0]))
> ! 
> !     vec[0].iov_base = chunksize;
> !     vec[0].iov_len = ap_snprintf(chunksize, sizeof(chunksize), "%x\015\012",
> ! 	nbyte);
> !     vec[1].iov_base = (void *)buf;	/* cast is to avoid const warning */
> !     vec[1].iov_len = nbyte;
> !     vec[2].iov_base = "\r\n";
> !     vec[2].iov_len = 2;
> !     /* while it's nice an easy to build the vector and crud, it's painful
> !      * to deal with a partial writev()
> !      */
> !     for( i = 0; i < NVEC; ) {
> ! 	do rv = writev( fb->fd, &vec[i], NVEC - i );
> ! 	while ( rv == -1 && errno == EINTR );
> ! 	if( rv == -1 ) {
>   	    return( -1 );
>   	}
> ! 	/* recalculate vec to deal with partial writes */
> ! 	while( rv > 0 ) {
> ! 	    if( rv <= vec[i].iov_len ) {
> ! 		vec[i].iov_base = (char *)vec[i].iov_base + rv;
> ! 		vec[i].iov_len -= rv;
> ! 		rv = 0;
> ! 		if( vec[i].iov_len == 0 ) {
> ! 		    ++i;
> ! 		}
> ! 	    } else {
> ! 		rv -= vec[i].iov_len;
> ! 		++i;
> ! 	    }
>   	}
>       }
> !     /* if we got here, we wrote it all */
> !     return( nbyte );
> ! #undef NVEC
> ! #endif
>   }
>   
> + 
>   /*
>    * Write nbyte bytes.
>    * Only returns fewer than nbyte if an error ocurred.
>    * Returns -1 if no bytes were written before the error ocurred.
> +  * It is worth noting that if an error occurs, the buffer is in an unknown
> +  * state.
>    */
>   int
>   bwrite(BUFF *fb, const void *buf, int nbyte)
> ***************
> *** 577,583 ****
>   
>       if (!(fb->flags & B_WR))
>       {
> ! /* unbuffered write */
>   	do i = bcwrite(fb, buf, nbyte);
>   	while (i == -1 && errno == EINTR);
>   	if (i > 0) fb->bytes_sent += i;
> --- 714,721 ----
>   
>       if (!(fb->flags & B_WR))
>       {
> ! /* unbuffered write -- have to use bcwrite since we aren't taking care
> !  * of chunking any other way */
>   	do i = bcwrite(fb, buf, nbyte);
>   	while (i == -1 && errno == EINTR);
>   	if (i > 0) fb->bytes_sent += i;
> ***************
> *** 611,618 ****
>   	}
>   
>   /* the buffer must be full */
> ! 	do i = bcwrite(fb, fb->outbase, fb->bufsiz);
> ! 	while (i == -1 && errno == EINTR);
>   	if (i > 0) fb->bytes_sent += i;
>   	if (i == 0)
>   	{
> --- 749,765 ----
>   	}
>   
>   /* the buffer must be full */
> ! 	if( fb->flags & B_CHUNK ) {
> ! 	    end_chunk(fb);
> ! 	    /* it is just too painful to try to re-cram the buffer while
> ! 	     * chunking
> ! 	     */
> ! 	    i = write_it_all( fb->fd, fb->outbase, fb->outcnt )
> ! 		    ? -1 : fb->outcnt;
> ! 	} else {
> ! 	    do i = write(fb->fd, fb->outbase, fb->outcnt);
> ! 	    while (i == -1 && errno == EINTR);
> ! 	}
>   	if (i > 0) fb->bytes_sent += i;
>   	if (i == 0)
>   	{
> ***************
> *** 629,649 ****
>   	    else return nwr;
>   	}
>   
> ! /*
> !  * we should have written all the data, however if the fd was in a
> !  * strange (non-blocking) mode, then we might not have done so.
> !  */
> ! 	if (i < fb->bufsiz)
>   	{
> ! 	    int j, n=fb->bufsiz;
>   	    unsigned char *x=fb->outbase;
>   	    for (j=i; j < n; j++) x[j-i] = x[j];
> ! 	    fb->outcnt = fb->bufsiz - i;
>   	} else
>   	    fb->outcnt = 0;
>       }
>   /* we have emptied the file buffer. Now try to write the data from the
> !  * original buffer until there is less than bufsiz left
>    */
>       while (nbyte >= fb->bufsiz)
>       {
> --- 776,795 ----
>   	    else return nwr;
>   	}
>   
> ! 	/* deal with a partial write */
> ! 	if (i < fb->outcnt)
>   	{
> ! 	    int j, n=fb->outcnt;
>   	    unsigned char *x=fb->outbase;
>   	    for (j=i; j < n; j++) x[j-i] = x[j];
> ! 	    fb->outcnt -= i;
>   	} else
>   	    fb->outcnt = 0;
>       }
>   /* we have emptied the file buffer. Now try to write the data from the
> !  * original buffer until there is less than bufsiz left.  Note that we
> !  * use bcwrite() to do this for us, it will do the chunking so that
> !  * we don't have to dink around building a chunk in our own buffer.
>    */
>       while (nbyte >= fb->bufsiz)
>       {
> ***************
> *** 670,677 ****
>   	nbyte -= i;
>       }
>   /* copy what's left to the file buffer */
> !     if (nbyte > 0) memcpy(fb->outbase, buf, nbyte);
> !     fb->outcnt = nbyte;
>       nwr += nbyte;
>       return nwr;
>   }
> --- 816,825 ----
>   	nbyte -= i;
>       }
>   /* copy what's left to the file buffer */
> !     fb->outcnt = 0;
> !     if( fb->flags & B_CHUNK ) start_chunk( fb );
> !     if (nbyte > 0) memcpy(fb->outbase + fb->outcnt, buf, nbyte);
> !     fb->outcnt += nbyte;
>       nwr += nbyte;
>       return nwr;
>   }
> ***************
> *** 689,698 ****
>   
>       if (fb->flags & B_WRERR) return -1;
>       
>       while (fb->outcnt > 0)
>       {
>   /* the buffer must be full */
> ! 	do i = bcwrite(fb, fb->outbase, fb->outcnt);
>   	while (i == -1 && errno == EINTR);
>   	if (i > 0) fb->bytes_sent += i;
>   	if (i == 0)
> --- 837,848 ----
>   
>       if (fb->flags & B_WRERR) return -1;
>       
> +     if( fb->flags & B_CHUNK ) end_chunk(fb);
> + 
>       while (fb->outcnt > 0)
>       {
>   /* the buffer must be full */
> ! 	do i = write(fb->fd, fb->outbase, fb->outcnt);
>   	while (i == -1 && errno == EINTR);
>   	if (i > 0) fb->bytes_sent += i;
>   	if (i == 0)
> Index: buff.h
> ===================================================================
> RCS file: /export/home/cvs/apache/src/buff.h,v
> retrieving revision 1.11
> diff -c -3 -r1.11 buff.h
> *** buff.h	1997/01/30 02:42:57	1.11
> --- buff.h	1997/02/02 19:32:45
> ***************
> *** 79,84 ****
> --- 79,86 ----
>       unsigned char *inptr;  /* pointer to next location to read */
>       int incnt;             /* number of bytes left to read from input buffer;
>   			    * always 0 if had a read error  */
> +     int outchunk;	   /* location of chunk header when chunking */
> +     int outchunk_header_size; /* how long the header is */
>       int outcnt;            /* number of byte put in output buffer */
>       unsigned char *inbase;
>       unsigned char *outbase;