Received: by taz.hyperreal.com (8.8.4/V2.0) id UAA08681; Thu, 6 Feb 1997 20:39:53 -0800 (PST) Received: from sierra.zyzzyva.com by taz.hyperreal.com (8.8.4/V2.0) with ESMTP id UAA08651; Thu, 6 Feb 1997 20:39:35 -0800 (PST) Received: from sierra (localhost [127.0.0.1]) by sierra.zyzzyva.com (8.8.4/8.8.2) with ESMTP id WAA24662 for ; Thu, 6 Feb 1997 22:40:05 -0600 (CST) Message-Id: <199702070440.WAA24662@sierra.zyzzyva.com> To: new-httpd@hyperreal.com Subject: Re: [PATCH] chunked-encoding performance improvement In-reply-to: dgaudet's message of Sun, 02 Feb 1997 11:54:30 -0800. X-uri: http://www.zyzzyva.com/ Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Date: Thu, 06 Feb 1997 22:40:05 -0600 From: Randy Terbush Sender: new-httpd-owner@apache.org Precedence: bulk Reply-To: new-httpd@hyperreal.com +1 I worked through the SSL issues and this seems to work fine. > Here it is. This patch eliminates all "wasteful" bflush() calls and > attempts to be as efficient with write()s as possible without making the > code too ugly. > > Essentially it makes the chunk header and trailer a part of the output > buffer, and maintains enough state in the BUFF to be able to fix up the > header when it is ready to flush the chunk. It uses writev() when it's > not efficient for us to buffer a large write -- i.e. a bwrite() of a large > block will first fill the buffer, and chunk that, then if the remaining > part is larger than bufsiz it will be chunked without first copying it to > a buffer. > > I added NO_WRITEV which should be defined on platforms that do not have > writev(). They will experience a performance penalty on those large > writes, but that's just too bad. A keen porter could implement a writev() > that did userland copying to assemble a single write(). For now, > NO_WRITEV is not defined anywhere. > > I have tested this on various types of requests, and poured over output > dumps to ensure it does the right thing. Chunked encoding is forced when > the response contains no Content-Length, so testing with CGIs and SSIs is > a good approach. At one point I lowered the DEFAULT_BUFSIZE in buff.c in > order to be sure the large write support was working. > > This may look like a large change this late in the cycle, so I should > point out that without it the present code spits a packet for the response > headers, and three packets for each chunk. This makes persistent > connections far more expensive than they should be. > > Dean > > Index: buff.c > =================================================================== > RCS file: /export/home/cvs/apache/src/buff.c,v > retrieving revision 1.16 > diff -c -3 -r1.16 buff.c > *** buff.c 1997/01/30 02:42:57 1.16 > --- buff.c 1997/02/02 19:32:45 > *************** > *** 57,62 **** > --- 57,65 ---- > #ifndef NO_UNISTD_H > #include > #endif > + #ifndef NO_WRITEV > + #include > + #endif > > #include "conf.h" > #include "alloc.h" > *************** > *** 115,127 **** > if (flags & B_RD) fb->inbase = palloc(p, fb->bufsiz); > else fb->inbase = NULL; > > ! if (flags & B_WR) fb->outbase = palloc(p, fb->bufsiz); > else fb->outbase = NULL; > > fb->inptr = fb->inbase; > > fb->incnt = 0; > fb->outcnt = 0; > fb->error = NULL; > fb->bytes_sent = 0L; > > --- 118,133 ---- > if (flags & B_RD) fb->inbase = palloc(p, fb->bufsiz); > else fb->inbase = NULL; > > ! /* overallocate so that we can put a chunk trailer of CRLF into this > ! * buffer */ > ! if (flags & B_WR) fb->outbase = palloc(p, fb->bufsiz + 2); > else fb->outbase = NULL; > > fb->inptr = fb->inbase; > > fb->incnt = 0; > fb->outcnt = 0; > + fb->outchunk = -1; > fb->error = NULL; > fb->bytes_sent = 0L; > > *************** > *** 175,196 **** > } > > /* > * Set a flag on (1) or off (0). > */ > int bsetflag(BUFF *fb, int flag, int value) > { > ! if( flag & B_CHUNK ) { > ! /* > ! * This shouldn't be true for much longer -djg > ! * Currently, these flags work > ! * as a function of the bcwrite() function, so we make sure to > ! * flush before setting them one way or the other; otherwise > ! * writes could end up with the wrong flag. > ! */ > ! bflush(fb); > } > - if (value) fb->flags |= flag; > - else fb->flags &= ~flag; > return value; > } > > --- 181,285 ---- > } > > /* > + * start chunked encoding > + */ > + static void > + start_chunk( BUFF *fb ) > + { > + char chunksize[16]; /* Big enough for practically anything */ > + int chunk_header_size; > + > + if( fb->outchunk != -1 ) { > + /* already chunking */ > + return; > + } > + if( !(fb->flags & B_WR) ) { > + /* unbuffered writes */ > + return; > + } > + > + /* we know that the chunk header is going to take at least 3 bytes... */ > + chunk_header_size = ap_snprintf( chunksize, sizeof(chunksize), > + "%x\015\012", fb->bufsiz - fb->outcnt - 3 ); > + /* we need at least the header_len + at least 1 data byte > + * remember that we've overallocated fb->outbase so that we can always > + * fit the two byte CRLF trailer > + */ > + if( fb->bufsiz - fb->outcnt < chunk_header_size + 1 ) { > + bflush(fb); > + } > + /* assume there's enough space now */ > + memcpy( &fb->outbase[fb->outcnt], chunksize, chunk_header_size ); > + fb->outchunk = fb->outcnt; > + fb->outcnt += chunk_header_size; > + fb->outchunk_header_size = chunk_header_size; > + } > + > + > + /* > + * end a chunk -- tweak the chunk_header from start_chunk, and add a trailer > + */ > + static void > + end_chunk( BUFF *fb ) > + { > + int i; > + > + if( fb->outchunk == -1 ) { > + /* not chunking */ > + return; > + } > + > + if( fb->outchunk + fb->outchunk_header_size == fb->outcnt ) { > + /* nothing was written into this chunk, and we can't write a 0 size > + * chunk because that signifies EOF, so just erase it > + */ > + fb->outcnt = fb->outchunk; > + fb->outchunk = -1; > + return; > + } > + > + /* we know this will fit because of how we wrote it in start_chunk() */ > + i = ap_snprintf( &fb->outbase[fb->outchunk], fb->outchunk_header_size, > + "%x", fb->outcnt - fb->outchunk - fb->outchunk_header_size ); > + > + /* we may have to tack some trailing spaces onto the number we just wrote > + * in case it was smaller than our estimated size. We've also written > + * a \0 into the buffer with ap_snprintf so we might have to put a > + * \r back in. > + */ > + i += fb->outchunk; > + while( fb->outbase[i] != '\015' && fb->outbase[i] != '\012' ) { > + fb->outbase[i++] = ' '; > + } > + if( fb->outbase[i] == '\012' ) { > + /* we overwrote the \r, so put it back */ > + fb->outbase[i-1] = '\015'; > + } > + > + /* tack on the trailing CRLF, we've reserved room for this */ > + fb->outbase[fb->outcnt++] = '\015'; > + fb->outbase[fb->outcnt++] = '\012'; > + > + fb->outchunk = -1; > + } > + > + > + /* > * Set a flag on (1) or off (0). > */ > int bsetflag(BUFF *fb, int flag, int value) > { > ! if (value) { > ! fb->flags |= flag; > ! if( flag & B_CHUNK ) { > ! start_chunk(fb); > ! } > ! } else { > ! fb->flags &= ~flag; > ! if( flag & B_CHUNK ) { > ! end_chunk(fb); > ! } > } > return value; > } > > *************** > *** 542,571 **** > * an interim solution pending a complete rewrite of all this stuff in > * 2.0, using something like sfio stacked disciplines or BSD's funopen(). > */ > ! int bcwrite(BUFF *fb, const void *buf, int nbyte) { > > ! if (fb->flags & B_CHUNK) { > ! char chunksize[16]; /* Big enough for practically anything */ > > ! ap_snprintf(chunksize, sizeof(chunksize), "%x\015\012", nbyte); > ! if( write_it_all(fb->fd, chunksize, strlen(chunksize)) == -1 ) { > ! return( -1 ); > ! } > ! if( write_it_all(fb->fd, buf, nbyte) == -1 ) { > return( -1 ); > } > ! if( write_it_all(fb->fd, "\015\012", 2) == -1 ) { > ! return( -1 ); > } > - return( nbyte ); > } > ! return( write(fb->fd, buf, nbyte) ); > } > > /* > * Write nbyte bytes. > * Only returns fewer than nbyte if an error ocurred. > * Returns -1 if no bytes were written before the error ocurred. > */ > int > bwrite(BUFF *fb, const void *buf, int nbyte) > --- 631,708 ---- > * an interim solution pending a complete rewrite of all this stuff in > * 2.0, using something like sfio stacked disciplines or BSD's funopen(). > */ > ! static int > ! bcwrite(BUFF *fb, const void *buf, int nbyte) { > > ! char chunksize[16]; /* Big enough for practically anything */ > ! #ifndef NO_WRITEV > ! struct iovec vec[3]; > ! int i, rv; > ! #endif > > ! if( !(fb->flags & B_CHUNK) ) return write( fb->fd, buf, nbyte ); > ! > ! #ifdef NO_WRITEV > ! /* without writev() this has poor performance, too bad */ > ! > ! ap_snprintf(chunksize, sizeof(chunksize), "%x\015\012", nbyte); > ! if( write_it_all(fb->fd, chunksize, strlen(chunksize)) == -1 ) { > ! return( -1 ); > ! } > ! if( write_it_all(fb->fd, buf, nbyte) == -1 ) { > ! return( -1 ); > ! } > ! if( write_it_all(fb->fd, "\015\012", 2) == -1 ) { > ! return( -1 ); > ! } > ! return( nbyte ); > ! #else > ! #define NVEC (sizeof(vec)/sizeof(vec[0])) > ! > ! vec[0].iov_base = chunksize; > ! vec[0].iov_len = ap_snprintf(chunksize, sizeof(chunksize), "%x\015\012", > ! nbyte); > ! vec[1].iov_base = (void *)buf; /* cast is to avoid const warning */ > ! vec[1].iov_len = nbyte; > ! vec[2].iov_base = "\r\n"; > ! vec[2].iov_len = 2; > ! /* while it's nice an easy to build the vector and crud, it's painful > ! * to deal with a partial writev() > ! */ > ! for( i = 0; i < NVEC; ) { > ! do rv = writev( fb->fd, &vec[i], NVEC - i ); > ! while ( rv == -1 && errno == EINTR ); > ! if( rv == -1 ) { > return( -1 ); > } > ! /* recalculate vec to deal with partial writes */ > ! while( rv > 0 ) { > ! if( rv <= vec[i].iov_len ) { > ! vec[i].iov_base = (char *)vec[i].iov_base + rv; > ! vec[i].iov_len -= rv; > ! rv = 0; > ! if( vec[i].iov_len == 0 ) { > ! ++i; > ! } > ! } else { > ! rv -= vec[i].iov_len; > ! ++i; > ! } > } > } > ! /* if we got here, we wrote it all */ > ! return( nbyte ); > ! #undef NVEC > ! #endif > } > > + > /* > * Write nbyte bytes. > * Only returns fewer than nbyte if an error ocurred. > * Returns -1 if no bytes were written before the error ocurred. > + * It is worth noting that if an error occurs, the buffer is in an unknown > + * state. > */ > int > bwrite(BUFF *fb, const void *buf, int nbyte) > *************** > *** 577,583 **** > > if (!(fb->flags & B_WR)) > { > ! /* unbuffered write */ > do i = bcwrite(fb, buf, nbyte); > while (i == -1 && errno == EINTR); > if (i > 0) fb->bytes_sent += i; > --- 714,721 ---- > > if (!(fb->flags & B_WR)) > { > ! /* unbuffered write -- have to use bcwrite since we aren't taking care > ! * of chunking any other way */ > do i = bcwrite(fb, buf, nbyte); > while (i == -1 && errno == EINTR); > if (i > 0) fb->bytes_sent += i; > *************** > *** 611,618 **** > } > > /* the buffer must be full */ > ! do i = bcwrite(fb, fb->outbase, fb->bufsiz); > ! while (i == -1 && errno == EINTR); > if (i > 0) fb->bytes_sent += i; > if (i == 0) > { > --- 749,765 ---- > } > > /* the buffer must be full */ > ! if( fb->flags & B_CHUNK ) { > ! end_chunk(fb); > ! /* it is just too painful to try to re-cram the buffer while > ! * chunking > ! */ > ! i = write_it_all( fb->fd, fb->outbase, fb->outcnt ) > ! ? -1 : fb->outcnt; > ! } else { > ! do i = write(fb->fd, fb->outbase, fb->outcnt); > ! while (i == -1 && errno == EINTR); > ! } > if (i > 0) fb->bytes_sent += i; > if (i == 0) > { > *************** > *** 629,649 **** > else return nwr; > } > > ! /* > ! * we should have written all the data, however if the fd was in a > ! * strange (non-blocking) mode, then we might not have done so. > ! */ > ! if (i < fb->bufsiz) > { > ! int j, n=fb->bufsiz; > unsigned char *x=fb->outbase; > for (j=i; j < n; j++) x[j-i] = x[j]; > ! fb->outcnt = fb->bufsiz - i; > } else > fb->outcnt = 0; > } > /* we have emptied the file buffer. Now try to write the data from the > ! * original buffer until there is less than bufsiz left > */ > while (nbyte >= fb->bufsiz) > { > --- 776,795 ---- > else return nwr; > } > > ! /* deal with a partial write */ > ! if (i < fb->outcnt) > { > ! int j, n=fb->outcnt; > unsigned char *x=fb->outbase; > for (j=i; j < n; j++) x[j-i] = x[j]; > ! fb->outcnt -= i; > } else > fb->outcnt = 0; > } > /* we have emptied the file buffer. Now try to write the data from the > ! * original buffer until there is less than bufsiz left. Note that we > ! * use bcwrite() to do this for us, it will do the chunking so that > ! * we don't have to dink around building a chunk in our own buffer. > */ > while (nbyte >= fb->bufsiz) > { > *************** > *** 670,677 **** > nbyte -= i; > } > /* copy what's left to the file buffer */ > ! if (nbyte > 0) memcpy(fb->outbase, buf, nbyte); > ! fb->outcnt = nbyte; > nwr += nbyte; > return nwr; > } > --- 816,825 ---- > nbyte -= i; > } > /* copy what's left to the file buffer */ > ! fb->outcnt = 0; > ! if( fb->flags & B_CHUNK ) start_chunk( fb ); > ! if (nbyte > 0) memcpy(fb->outbase + fb->outcnt, buf, nbyte); > ! fb->outcnt += nbyte; > nwr += nbyte; > return nwr; > } > *************** > *** 689,698 **** > > if (fb->flags & B_WRERR) return -1; > > while (fb->outcnt > 0) > { > /* the buffer must be full */ > ! do i = bcwrite(fb, fb->outbase, fb->outcnt); > while (i == -1 && errno == EINTR); > if (i > 0) fb->bytes_sent += i; > if (i == 0) > --- 837,848 ---- > > if (fb->flags & B_WRERR) return -1; > > + if( fb->flags & B_CHUNK ) end_chunk(fb); > + > while (fb->outcnt > 0) > { > /* the buffer must be full */ > ! do i = write(fb->fd, fb->outbase, fb->outcnt); > while (i == -1 && errno == EINTR); > if (i > 0) fb->bytes_sent += i; > if (i == 0) > Index: buff.h > =================================================================== > RCS file: /export/home/cvs/apache/src/buff.h,v > retrieving revision 1.11 > diff -c -3 -r1.11 buff.h > *** buff.h 1997/01/30 02:42:57 1.11 > --- buff.h 1997/02/02 19:32:45 > *************** > *** 79,84 **** > --- 79,86 ---- > unsigned char *inptr; /* pointer to next location to read */ > int incnt; /* number of bytes left to read from input buffer; > * always 0 if had a read error */ > + int outchunk; /* location of chunk header when chunking */ > + int outchunk_header_size; /* how long the header is */ > int outcnt; /* number of byte put in output buffer */ > unsigned char *inbase; > unsigned char *outbase;