Return-Path: Delivered-To: apmail-apr-dev-archive@www.apache.org Received: (qmail 56383 invoked from network); 19 Sep 2003 20:02:59 -0000 Received: from daedalus.apache.org (HELO mail.apache.org) (208.185.179.12) by minotaur-2.apache.org with SMTP; 19 Sep 2003 20:02:59 -0000 Received: (qmail 34375 invoked by uid 500); 19 Sep 2003 20:02:47 -0000 Delivered-To: apmail-apr-dev-archive@apr.apache.org Received: (qmail 34253 invoked by uid 500); 19 Sep 2003 20:02:46 -0000 Mailing-List: contact dev-help@apr.apache.org; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: Delivered-To: mailing list dev@apr.apache.org Received: (qmail 34237 invoked from network); 19 Sep 2003 20:02:46 -0000 Errors-To: Message-Id: <5.2.0.9.2.20030919144601.026ef008@pop3.rowe-clan.net> X-Sender: wrowe%rowe-clan.net@pop3.rowe-clan.net X-Mailer: QUALCOMM Windows Eudora Version 5.2.0.9 Date: Fri, 19 Sep 2003 14:56:03 -0500 To: dev@apr.apache.org From: "William A. Rowe, Jr." Subject: apr-util/dbm/sdbm page sizes Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="=====================_183449000==_" X-Spam-Rating: daedalus.apache.org 1.6.2 0/1000/N X-Spam-Rating: minotaur-2.apache.org 1.6.2 0/1000/N --=====================_183449000==_ Content-Type: text/plain; charset="us-ascii" As a few here already know, we used the 'Standard' page size for our apr-util SDBM implementation. This assures that you can modify our sdbm files from perl and other tools. However, it's pretty clear that 1024 just doesn't cut it when it comes to huge objects from ssl caching and other sorts of large blobs. The modssl project (1.3) had tweaked sdbm to take larger page sizes, but this leads to an inflexible implementation (another fixed page size, but incompatible with other implementations.) I'd like to see apr-util support alternate sized dbm pages, but without making fixed assumptions about those sizes (other than this - the structure uses short's for offsets, so the page size does have a cap.) The attached patch allows the SDBM to be of any arbitrary data and directory page sizes. However, I've not hacked in any method of declaring what those sizes should be (they are the 'standard' defaults at the moment.) What do folks believe is the most rational approach to expanding support to create an sdbm with alternate page sizes? And how best can we embed that info into the sdbm so that later accesses to the file use the correct page size? My current thought is to embed a NULL-key record at the head of the file, with a specific known value element containing some identifier string such as "APR-SDBM" followed by two ints, the page and data len. If that record is missing (or created with default sizes), the file is treated as a standard sdbm with 1024/4096 sized pages. Other thoughts, suggestions or observations? Bill --=====================_183449000==_ Content-Type: text/plain; charset="us-ascii" Content-Disposition: attachment; filename="sdbm_var.patch" ? foo Index: dbm/sdbm/sdbm.c =================================================================== RCS file: /home/cvs/apr-util/dbm/sdbm/sdbm.c,v retrieving revision 1.26 diff -u -r1.26 sdbm.c --- dbm/sdbm/sdbm.c 1 Jan 2003 00:02:19 -0000 1.26 +++ dbm/sdbm/sdbm.c 19 Sep 2003 19:47:16 -0000 @@ -71,7 +71,6 @@ #include "sdbm_private.h" #include /* for memset() */ -#include /* for malloc() and free() */ /* * forward @@ -92,8 +91,8 @@ #define sdbm_dirfno(db) ((db)->dirf) #define sdbm_pagfno(db) ((db)->pagf) -#define OFF_PAG(off) (apr_off_t) (off) * PBLKSIZ -#define OFF_DIR(off) (apr_off_t) (off) * DBLKSIZ +#define OFF_PAG(db,off) ((apr_off_t) (off) * (db)->pblksiz) +#define OFF_DIR(db,off) ((apr_off_t) (off) * (db)->dblksiz) static long masks[] = { 000000000000, 000000000001, 000000000003, 000000000007, @@ -120,7 +119,6 @@ (void) apr_file_unlock(db->dirf); (void) apr_file_close(db->dirf); (void) apr_file_close(db->pagf); - free(db); return APR_SUCCESS; } @@ -133,11 +131,27 @@ *pdb = NULL; - db = malloc(sizeof(*db)); - memset(db, 0, sizeof(*db)); - + /* Allocate a single buffer for the structure and NULL it + */ + db = apr_palloc(p, sizeof(apr_sdbm_t)); + memset(db, 0, sizeof(apr_sdbm_t)); db->pool = p; + /* Determine the dblksiz/pblksiz (unimplemented), + * then allocate and null the buffers. + * Because we ultimately need to read these after the db is opened, + * they can't be allocated at the time we allocate db. + * The default sizes below indicate the "standard" sdbm parameters + * that are respected by perl and most sdbm implementations. + */ + db->dblksiz = 4096; + db->pblksiz = 1024; + + db->pagbuf = apr_palloc(db->pool, db->pblksiz * 2 + db->dblksiz); + memset(db->pagbuf, 0, db->pblksiz + db->dblksiz); + db->pagtmp = db->pagbuf + db->pblksiz; + db->dirbuf = db->pagtmp + db->pblksiz; + /* * adjust user flags so that WRONLY becomes RDWR, * as required by this package. Also set our internal @@ -205,7 +219,6 @@ if (db->pagf != NULL) { (void) apr_file_close(db->pagf); } - free(db); return status; } @@ -248,10 +261,10 @@ static apr_status_t write_page(apr_sdbm_t *db, const char *buf, long pagno) { apr_status_t status; - apr_off_t off = OFF_PAG(pagno); + apr_off_t off = OFF_PAG(db, pagno); if ((status = apr_file_seek(db->pagf, APR_SET, &off)) == APR_SUCCESS) - status = apr_file_write_full(db->pagf, buf, PBLKSIZ, NULL); + status = apr_file_write_full(db->pagf, buf, db->pblksiz, NULL); return status; } @@ -295,9 +308,11 @@ return APR_EINVAL; need = key.dsize + val.dsize; /* - * is the pair too big (or too small) for this database ?? + * Is the pair too big (or too small) for this database ?? + * This is derived from the original MAXPAIR calculation. + * At least MS compiler screamed that sizeof left the RHS unsigned. */ - if (need < 0 || need > PAIRMAX) + if ((need < 0) || (need > db->pblksiz - (int)sizeof(short) * 8)) return APR_EINVAL; if ((status = apr_sdbm_lock(db, APR_FLOCK_EXCLUSIVE)) != APR_SUCCESS) @@ -344,7 +359,7 @@ static apr_status_t makroom(apr_sdbm_t *db, long hash, int need) { long newp; - char twin[PBLKSIZ]; + char *twin = db->pagtmp; char *pag = db->pagbuf; char *new = twin; register int smax = SPLTMAX; @@ -374,7 +389,7 @@ return status; db->pagbno = newp; - (void) memcpy(pag, new, PBLKSIZ); + (void) memcpy(pag, new, db->pblksiz); } else { if ((status = write_page(db, new, newp)) != APR_SUCCESS) @@ -452,7 +467,7 @@ /* * start at page 0 */ - if ((status = read_from(db->pagf, db->pagbuf, OFF_PAG(0), PBLKSIZ)) + if ((status = read_from(db->pagf, db->pagbuf, OFF_PAG(db, 0), db->pblksiz)) == APR_SUCCESS) { db->pagbno = 0; db->blkptr = 0; @@ -512,7 +527,7 @@ * ### joe: this assumption was surely never correct? but * ### we make it so in read_from anyway. */ - if ((status = read_from(db->pagf, db->pagbuf, OFF_PAG(pagb), PBLKSIZ)) + if ((status = read_from(db->pagf, db->pagbuf, OFF_PAG(db, pagb), db->pblksiz)) != APR_SUCCESS) return status; @@ -531,10 +546,10 @@ register long dirb; c = dbit / BYTESIZ; - dirb = c / DBLKSIZ; + dirb = c / db->dblksiz; if (dirb != db->dirbno) { - if (read_from(db->dirf, db->dirbuf, OFF_DIR(dirb), DBLKSIZ) + if (read_from(db->dirf, db->dirbuf, OFF_DIR(db, dirb), db->dblksiz) != APR_SUCCESS) return 0; @@ -543,7 +558,7 @@ debug(("dir read: %d\n", dirb)); } - return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ); + return db->dirbuf[c % db->dblksiz] & (1 << dbit % BYTESIZ); } static apr_status_t setdbit(apr_sdbm_t *db, long dbit) @@ -554,10 +569,10 @@ apr_off_t off; c = dbit / BYTESIZ; - dirb = c / DBLKSIZ; + dirb = c / db->dblksiz; if (dirb != db->dirbno) { - if ((status = read_from(db->dirf, db->dirbuf, OFF_DIR(dirb), DBLKSIZ)) + if ((status = read_from(db->dirf, db->dirbuf, OFF_DIR(db, dirb), db->dblksiz)) != APR_SUCCESS) return status; @@ -566,14 +581,14 @@ debug(("dir read: %d\n", dirb)); } - db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ); + db->dirbuf[c % db->dblksiz] |= (1 << dbit % BYTESIZ); if (dbit >= db->maxbno) - db->maxbno += DBLKSIZ * BYTESIZ; + db->maxbno += db->dblksiz * BYTESIZ; - off = OFF_DIR(dirb); + off = OFF_DIR(db, dirb); if ((status = apr_file_seek(db->dirf, APR_SET, &off)) == APR_SUCCESS) - status = apr_file_write_full(db->dirf, db->dirbuf, DBLKSIZ, NULL); + status = apr_file_write_full(db->dirf, db->dirbuf, db->dblksiz, NULL); return status; } @@ -597,7 +612,7 @@ */ db->keyptr = 0; if (db->pagbno != db->blkptr++) { - apr_off_t off = OFF_PAG(db->blkptr); + apr_off_t off = OFF_PAG(db, db->blkptr); if ((status = apr_file_seek(db->pagf, APR_SET, &off) != APR_SUCCESS)) return status; @@ -605,7 +620,7 @@ db->pagbno = db->blkptr; /* ### EOF acceptable here too? */ - if ((status = apr_file_read_full(db->pagf, db->pagbuf, PBLKSIZ, NULL)) + if ((status = apr_file_read_full(db->pagf, db->pagbuf, db->pblksiz, NULL)) != APR_SUCCESS) return status; if (!chkpage(db->pagbuf)) @@ -623,4 +638,3 @@ */ return (db->flags & SDBM_RDONLY) != 0; } - Index: dbm/sdbm/sdbm_pair.c =================================================================== RCS file: /home/cvs/apr-util/dbm/sdbm/sdbm_pair.c,v retrieving revision 1.6 diff -u -r1.6 sdbm_pair.c --- dbm/sdbm/sdbm_pair.c 1 Jan 2003 00:02:19 -0000 1.6 +++ dbm/sdbm/sdbm_pair.c 19 Sep 2003 19:47:16 -0000 @@ -102,12 +102,12 @@ char *pag; int need; { - register int n; - register int off; - register int avail; register short *ino = (short *) pag; + register int n = ino[0]; + register int avail; + register int off; - off = ((n = ino[0]) > 0) ? ino[n] : PBLKSIZ; + off = (n > 0) ? ino[n] : PBLKSIZ; avail = off - (n + 1) * sizeof(short); need += 2 * sizeof(short); @@ -122,11 +122,11 @@ apr_sdbm_datum_t key; apr_sdbm_datum_t val; { - register int n; - register int off; register short *ino = (short *) pag; + register int n = ino[0]; + register int off; - off = ((n = ino[0]) > 0) ? ino[n] : PBLKSIZ; + off = (n > 0) ? ino[n] : PBLKSIZ; /* * enter the key first */ @@ -150,12 +150,12 @@ char *pag; apr_sdbm_datum_t key; { - register int i; - register int n; apr_sdbm_datum_t val; register short *ino = (short *) pag; + register int n = ino[0]; + register int i; - if ((n = ino[0]) == 0) + if (n == 0) return sdbm_nullitem; if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0) @@ -180,9 +180,9 @@ char *pag; int num; { + register short *ino = (short *) pag; apr_sdbm_datum_t key; register int off; - register short *ino = (short *) pag; num = num * 2 - 1; if (ino[0] == 0 || num > ino[0]) @@ -201,11 +201,11 @@ char *pag; apr_sdbm_datum_t key; { - register int n; - register int i; register short *ino = (short *) pag; + register int n = ino[0]; + register int i; - if ((n = ino[0]) == 0) + if (n == 0) return 0; if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0) @@ -336,11 +336,11 @@ chkpage(pag) char *pag; { - register int n; - register int off; register short *ino = (short *) pag; + register int n = ino[0]; + register int off; - if ((n = ino[0]) < 0 || n > PBLKSIZ / sizeof(short)) + if (n < 0 || n > PBLKSIZ / sizeof(short)) return 0; if (n > 0) { Index: dbm/sdbm/sdbm_private.h =================================================================== RCS file: /home/cvs/apr-util/dbm/sdbm/sdbm_private.h,v retrieving revision 1.10 diff -u -r1.10 sdbm_private.h --- dbm/sdbm/sdbm_private.h 1 Jan 2003 00:02:19 -0000 1.10 +++ dbm/sdbm/sdbm_private.h 19 Sep 2003 19:47:16 -0000 @@ -66,17 +66,7 @@ #include "apr_file_io.h" #include "apr_errno.h" /* for apr_status_t */ -#if 0 -/* if the block/page size is increased, it breaks perl apr_sdbm_t compatibility */ -#define DBLKSIZ 16384 -#define PBLKSIZ 8192 -#define PAIRMAX 8008 /* arbitrary on PBLKSIZ-N */ -#else -#define DBLKSIZ 4096 -#define PBLKSIZ 1024 -#define PAIRMAX 1008 /* arbitrary on PBLKSIZ-N */ -#endif -#define SPLTMAX 10 /* maximum allowed splits */ +#define SPLTMAX 10 /* maximum allowed splits */ /* for apr_sdbm_t.flags */ #define SDBM_RDONLY 0x1 /* data base open read-only */ @@ -96,9 +86,12 @@ int keyptr; /* current key for nextkey */ long blkno; /* current page to read/write */ long pagbno; /* current page in pagbuf */ - char pagbuf[PBLKSIZ]; /* page file block buffer */ + int pblksiz; /* sdbm's data blocksize (d:1024) */ + char *pagbuf; /* page file block buffer */ + char *pagtmp; /* page file temp workspace */ long dirbno; /* current block in dirbuf */ - char dirbuf[DBLKSIZ]; /* directory file block buffer */ + int dblksiz; /* sdbm's dir blocksize (d:4096) */ + char *dirbuf; /* directory file block buffer */ int lckcnt; /* number of calls to sdbm_lock */ }; --=====================_183449000==_ Content-Type: text/plain; charset="us-ascii" --=====================_183449000==_--