Return-Path: Mailing-List: contact embperl-cvs-help@perl.apache.org; run by ezmlm Delivered-To: mailing list embperl-cvs@perl.apache.org Received: (qmail 43209 invoked by uid 500); 8 May 2000 18:16:30 -0000 Delivered-To: apmail-embperl-cvs@apache.org Received: (qmail 43206 invoked by uid 1071); 8 May 2000 18:16:29 -0000 Date: 8 May 2000 18:16:29 -0000 Message-ID: <20000508181629.43205.qmail@locus.apache.org> From: richter@locus.apache.org To: embperl-cvs@apache.org Subject: cvs commit: embperl/test/html input.htm richter 00/05/08 11:16:29 Modified: . Tag: Embperl2 embperl.h epdom.c epparse.c test/html Tag: Embperl2 input.htm Added: . Tag: Embperl2 epcomp.c epdom.h Log: Embperl 2 - DOM Revision Changes Path No revision No revision 1.17.2.3 +0 -25 embperl/embperl.h Index: embperl.h =================================================================== RCS file: /home/cvs/embperl/embperl.h,v retrieving revision 1.17.2.2 retrieving revision 1.17.2.3 diff -u -r1.17.2.2 -r1.17.2.3 --- embperl.h 2000/05/03 14:03:49 1.17.2.2 +++ embperl.h 2000/05/08 18:16:27 1.17.2.3 @@ -189,31 +189,6 @@ extern pid_t nPid ; -/* - Node Types -*/ - -enum tNodeType - { - ntypCDATA = 1, - ntypTag = 2, - ntypStartTag = 3, - ntypEndTag = 4, - ntypAttr = 5, - ntypAttrValue = 6, - } ; - - - -struct tNode - { - enum tNodeType nType ; - const char * sText ; - struct tNode * pFirstChild ; - struct tNode * pLastChild ; - struct tNode * pSibling ; - struct tNode * pParent ; - } ; 1.1.2.4 +348 -34 embperl/Attic/epdom.c Index: epdom.c =================================================================== RCS file: /home/cvs/embperl/Attic/epdom.c,v retrieving revision 1.1.2.3 retrieving revision 1.1.2.4 diff -u -r1.1.2.3 -r1.1.2.4 --- epdom.c 2000/05/04 05:42:02 1.1.2.3 +++ epdom.c 2000/05/08 18:16:27 1.1.2.4 @@ -15,8 +15,178 @@ #include "ep.h" #include "epmacro.h" +#include "epdom.h" +HV * pStringTableHash ; /* Hash to translate strings to index number */ +const char * * pStringTableArray ; /* Array with pointers to strings */ + +struct tNodeData * * pNodeLookup ; +struct tNodePad * * pPadLookup ; + +int nInitialNodePadSize = 64 ; + +int nMemUsage = 0 ; +int numNodes = 0 ; +int numAttr = 0 ; +int numStr = 0 ; +int numPads = 0 ; + +#define dom_malloc(s) (nMemUsage += s, malloc(s)) + +/* ------------------------------------------------------------------------ */ +/* */ +/* ArrayNew */ +/* */ +/* Create a new dynamic array */ +/* */ +/* ------------------------------------------------------------------------ */ + +int ArrayNew (/*in*/ const tArray * pArray, + /*in*/ int nAdd, + /*in*/ int nElementSize) + + + { + struct tArrayCtrl * pNew ; + + if ((pNew = dom_malloc (nAdd * nElementSize + sizeof (struct tArrayCtrl))) == NULL) + return 0 ; + + *(void * *)pArray = (struct tArray *)(pNew + 1) ; + pNew -> nMax = nAdd ; + pNew -> nAdd = nAdd ; + pNew -> nFill = 0 ; + pNew -> nElementSize = nElementSize ; + + return ok ; + } + + +/* ------------------------------------------------------------------------ */ +/* */ +/* ArrayAdd */ +/* */ +/* Make space for numElements in Array and return index of first one */ +/* */ +/* ------------------------------------------------------------------------ */ + + +int ArrayAdd (/*in*/ const tArray * pArray, + /*in*/ int numElements) + + { + struct tArrayCtrl * pCtrl = ((struct tArrayCtrl *)(*(void * *)pArray)) - 1 ; + int nNdx ; + + + if (pCtrl -> nFill + numElements > pCtrl -> nMax) + { + struct tArrayCtrl * pNew ; + int nNewMax = pCtrl -> nFill + numElements + pCtrl -> nAdd ; + + if ((pNew = realloc (pCtrl, nNewMax * pCtrl -> nElementSize + sizeof (struct tArrayCtrl))) == NULL) + return 0 ; + + *(void * *)pArray = (struct tArray *)(pNew + 1) ; + pNew -> nMax = nNewMax ; + pCtrl = pNew ; + } + + nNdx = pCtrl -> nFill ; + pCtrl -> nFill += numElements ; + return nNdx ; + } + + +/* ------------------------------------------------------------------------ */ +/* */ +/* String2Ndx */ +/* */ +/* Convert String to an unique index */ +/* */ +/* ------------------------------------------------------------------------ */ + + +int String2Ndx (/*in*/ const char * sText, + /*in*/ int nLen) + + { + SV * * ppSV ; + SV * pSVKey ; + SV * pSVNdx ; + HE * pHEKey ; + int nNdx ; + + if ((ppSV = hv_fetch (pStringTableHash, (char *)sText, nLen, 1)) == NULL) + return 0 ; + + if (*ppSV != NULL && SvTYPE (*ppSV) == SVt_IV) + return SvIV (*ppSV) ; + + /* new string */ + + nNdx = ArrayAdd (&pStringTableArray, 1) ; + + pSVNdx = newSViv (nNdx) ; + SvREFCNT_inc (pSVNdx) ; + pSVKey = newSVpvn ((char *)sText, nLen) ; + pHEKey = hv_store_ent (pStringTableHash, pSVKey, pSVNdx, 0) ; + + pStringTableArray[nNdx] = HeKEY (pHEKey) ; + + numStr++ ; + + return nNdx ; + } + +/* ------------------------------------------------------------------------ */ +/* */ +/* Ndx2String */ +/* */ +/* Get String from index */ +/* */ +/* ------------------------------------------------------------------------ */ + +const char * Ndx2String (/*in*/ int nNdx) + + { + return pStringTableArray[nNdx] ; + } + +/* ------------------------------------------------------------------------ */ +/* */ +/* DomInit */ +/* */ +/* */ +/* */ +/* ------------------------------------------------------------------------ */ + + +int DomInit (void) + + { + pStringTableHash = newHV () ; + + ArrayNew (&pStringTableArray, 128, sizeof (char *)) ; + String2Ndx ("", 0) ; + + ArrayNew (&pNodeLookup, 4096, sizeof (struct tNodeData *)) ; + ArrayAdd (&pNodeLookup, 1) ; + pNodeLookup[0] = NULL ; + ArrayNew (&pPadLookup, 256, sizeof (struct tNodePad *)) ; + ArrayAdd (&pPadLookup, 1) ; + pPadLookup[0] = NULL ; + } + + +int mydie (char * msg) + { + puts (msg) ; + exit (1) ; + } + + /* ------------------------------------------------------------------------ interface Node { @@ -63,56 +233,200 @@ */ +/* ------------------------------------------------------------------------ */ +/* */ +/* NewPad */ +/* */ +/* Create a new pad for storing nodes */ +/* */ +/* ------------------------------------------------------------------------ */ + +static struct tNodePad * NewPad (/*in*/ tIndex xParent) + + { + tIndex xNdx = ArrayAdd (&pPadLookup, 1) ; + struct tNodePad * pChilds ; + + if ((pPadLookup [xNdx] = pChilds = dom_malloc (nInitialNodePadSize)) == NULL) + return NULL ; + + pChilds -> nFill = sizeof (struct tNodePad) ; + pChilds -> nMax = nInitialNodePadSize ; + pChilds -> numChilds = 0 ; + pChilds -> xParent = xParent ; + pChilds -> xNdx = xNdx ; + pChilds -> xFirst = xNdx ; + pChilds -> xNext = 0 ; + pChilds -> xPrev = 0 ; + pChilds -> xLast = 0 ; + + + numPads++ ; + return pChilds ; + } + + /* ------------------------------------------------------------------------ */ /* */ -/* CreateNode */ +/* Node_appendChild */ /* */ -/* */ +/* Append a child node to a parent node */ /* */ /* ------------------------------------------------------------------------ */ + -int CreateNode (/*i/o*/ register req * r, - /*in*/ enum tNodeType nType, - /*in*/ const char * sText, - /*in*/ int nTextLen, - /*in*/ struct tNode * pParent, - /*in*/ int nLevel, - /*out*/ struct tNode * * pNewNode) +tNode Node_appendChild (/*i/o*/ register req * r, + /*in*/ tNodeType nType, + /*in*/ const char * sText, + /*in*/ int nTextLen, + /*in*/ tNode xParent, + /*in*/ int nLevel) { - struct tNode * pNode ; + struct tNodeData * pParent = pNodeLookup [xParent] ; + + if (r -> bDebug & dbgParse) + lprintf (r, "[%d]PARSE: AddNode: +%02d parent=%d %*s type=%d text=%*.*s\n", r -> nPid, nLevel, xParent, nLevel * 2, "", nType, nTextLen, nTextLen, sText) ; - if ((pNode = _malloc (r, sizeof (struct tNode))) == NULL) - return rcOutOfMemory ; + if (nType == ntypAttr) + { + struct tNodePad * pPad = (struct tNodePad * )(((tUInt8 *)pParent) - pParent -> nPadNdx) ; + struct tAttrData * pNew = ((struct tAttrData * )(pParent + 1)) + pParent -> numAttr ; + if (((tUInt8 *)pNew) - ((tUInt8 *)pPad) != pPad -> nFill) + { /* not last child in pad -> make room */ + mydie ("not last child in pad -> make room") ; - pNode -> nType = nType ; - pNode -> sText = _ep_memdup (r, sText, nTextLen) ; - pNode -> pParent = pParent ; - pNode -> pSibling = NULL ; - pNode -> pFirstChild = NULL ; - pNode -> pLastChild = NULL ; - if (pParent) + } + if (((tUInt8 *)pNew) - ((tUInt8 *)pPad) + sizeof (struct tAttrData) > pPad -> nMax) + { /* pad full -> move into new pad */ + struct tNodePad * pNext = NewPad (pPad -> xParent) ; + int nNodeSize = ((tUInt8 *)pNew) - ((tUInt8 *)pParent) ; + struct tNodeData * pNewParent = (struct tNodeData * )(pNext + 1) ; + lprintf (r, "[%d]PARSE: NewPad Extent Attr: %d -> %d\n", r -> nPid, pPad -> xParent, pNext -> xNdx) ; + + pNext -> xPrev = pPad -> xNdx ; + pPad -> xNext = pNext -> xNdx ; + pPadLookup[pPad->xFirst] -> xLast = pNext -> xNdx ; + pNext -> xFirst = pPad->xFirst ; + + memcpy (pNewParent, pParent, nNodeSize) ; + pNewParent -> nPadNdx = pNext -> nFill ; + pNodeLookup [xParent] = pParent = pNewParent ; + pNew = ((struct tAttrData * )(pParent + 1)) + pParent -> numAttr ; + pPad -> nFill -= nNodeSize ; + pNext -> nFill += nNodeSize ; + pPad = pNext ; + } + + pNew -> nName = String2Ndx (sText, nTextLen) ; + pNew -> nValue = 0 ; + pParent -> numAttr++ ; + pPad -> nFill += sizeof (struct tAttrData) ; + numAttr++ ; + return 1 ; + } + else if (nType == ntypAttrValue) + { + struct tAttrData * pNew = ((struct tAttrData * )pParent + 1) + (pParent -> numAttr - 1); + pNew -> nValue = String2Ndx (sText, nTextLen) ; + return 1 ; + } + else { - if (pParent -> pLastChild) + struct tNodePad * pChilds ; + struct tNodeData * pNew ; + int xNdx ; + + if (!pParent || !pParent -> xChilds) { - pParent -> pLastChild -> pSibling = pNode ; - pParent -> pLastChild = pNode ; + pChilds = NewPad (xParent) ; + lprintf (r, "[%d]PARSE: NewPad NewChild: %d -> %d\n", r -> nPid, xParent, pChilds -> xNdx) ; + + if (pParent) + pParent -> xChilds = pChilds -> xNdx ; } - else + else { - pParent -> pFirstChild = pNode ; - pParent -> pLastChild = pNode ; + struct tNodePad * pFirst = pChilds = pPadLookup [pParent -> xChilds] ; + if (pChilds -> xLast) + pChilds = pPadLookup [pChilds -> xLast] ; + + if (pChilds -> nFill + sizeof (struct tNodeData) > pChilds -> nMax) + { /* pad full -> make room */ + struct tNodePad * pNext = NewPad (xParent) ; + lprintf (r, "[%d]PARSE: NewPad Extent Childs: %d -> %d\n", r -> nPid, xParent, pNext -> xNdx) ; + + pNext -> xPrev = pChilds -> xNdx ; + pChilds -> xNext = pNext -> xNdx ; + pFirst -> xLast = pNext -> xNdx ; + pNext -> xFirst = pFirst -> xFirst ; + + pChilds = pNext ; + } } - + + pNew = (struct tNodeData *)(((tUInt8 *)pChilds) + pChilds -> nFill) ; + + xNdx = ArrayAdd (&pNodeLookup, 1) ; + pNodeLookup[xNdx] = pNew ; + + pNew -> nText = String2Ndx (sText, nTextLen) ; + pNew -> nType = nType ; + pNew -> numAttr = 0 ; + pNew -> xNdx = xNdx ; + pNew -> xChilds = 0 ; + pNew -> nPadNdx = pChilds -> nFill ; + + pChilds -> numChilds++ ; + pChilds -> nFill += sizeof (struct tNodeData) ; + numNodes++ ; + return xNdx ; } - - if (r -> bDebug & dbgParse) - lprintf (r, "[%d]PARSE: AddNode: +%02d %*s type=%d text=%s\n", r -> nPid, nLevel, nLevel * 2, "", nType, pNode -> sText) ; - - *pNewNode = pNode ; - - return ok ; - } \ No newline at end of file + } + +/* ------------------------------------------------------------------------ */ +/* */ +/* Node_parentNode */ +/* */ +/* Get parent node */ +/* */ +/* ------------------------------------------------------------------------ */ + + +tNode Node_parentNode (/*in*/ tNode xNode) + + { + struct tNodeData * pNode = pNodeLookup [xNode] ; + struct tNodePad * pPad = (struct tNodePad * )(((tUInt8 *)pNode) - pNode -> nPadNdx) ; + lprintf (pCurrReq, "[%d]PARSE: parentNode: %d -> %d\n", pCurrReq -> nPid, xNode, pPad -> xParent) ; + return pPad -> xParent ; + } + + +/* ------------------------------------------------------------------------ */ +/* */ +/* Node_nodeName */ +/* */ +/* Get name of node */ +/* */ +/* ------------------------------------------------------------------------ */ + + int i ; + struct tNodePad * p ; + const char * s ; + +const char * Node_nodeName (/*in*/ tNode xNode) + + { + i = pNodeLookup [xNode] -> nText ; + p = pPadLookup [pNodeLookup [xNode] -> xChilds] ; + s = pStringTableArray[pNodeLookup [xNode] -> nText] ; + return s ; + } + + + + 1.1.2.7 +38 -23 embperl/Attic/epparse.c Index: epparse.c =================================================================== RCS file: /home/cvs/embperl/Attic/epparse.c,v retrieving revision 1.1.2.6 retrieving revision 1.1.2.7 diff -u -r1.1.2.6 -r1.1.2.7 --- epparse.c 2000/05/04 05:42:03 1.1.2.6 +++ epparse.c 2000/05/08 18:16:28 1.1.2.7 @@ -15,6 +15,7 @@ #include "ep.h" #include "epmacro.h" +#include "epdom.h" typedef unsigned char tCharMap [256/(sizeof(unsigned char)*8)] ; @@ -310,7 +311,7 @@ const char * pParentContains, enum tNodeType nCDataType, const char * pParentNodeName, - struct tNode * pParentNode, + tNode xParentNode, int level) { @@ -319,18 +320,18 @@ int nEndText = sEndText?strlen (sEndText):0 ; char * pCurr = *ppCurr ; char * pCurrStart = pCurr ; - struct tNode *pNewNode ; + tNode xNewNode ; int rc ; while (pCurr < pEnd) { + struct tToken * pToken = NULL ; if (pStartChars [*pCurr >> 3] & 1 << (*pCurr & 7)) { - struct tToken * pToken = NULL ; - struct tTokenTable * pNextTokenTab = pTokenTable ; - const char * pNodeName = NULL ; - enum tNodeType nNodeType = 0 ; - char * pCurrTokenStart = pCurr ; + struct tTokenTable * pNextTokenTab = pTokenTable ; + const char * pNodeName = NULL ; + enum tNodeType nNodeType = 0 ; + char * pCurrTokenStart = pCurr ; do @@ -389,8 +390,8 @@ if (pCurrStart < pCurrTokenStart) { if (nCDataType) - if ((rc = CreateNode (r, nCDataType, pCurrStart, pCurrTokenStart - pCurrStart, pParentNode, level, &pNewNode)) != ok) - return rc ; + if (!(xNewNode = Node_appendChild (r, nCDataType, pCurrStart, pCurrTokenStart - pCurrStart, xParentNode, level))) + return 1 ; pCurrStart = pCurrTokenStart ; } @@ -403,42 +404,48 @@ if (pEndCurr) pCurr = pEndCurr + strlen (pToken -> sEndText) ; level-- ; - pParentNode = pParentNode -> pParent ; + xParentNode = Node_parentNode (xParentNode) ; } else { - if ((rc = CreateNode (r, pToken -> nNodeType, pNodeName, strlen (pNodeName), pParentNode, level, &pNewNode)) != ok) + if (!(xNewNode = Node_appendChild (r, pToken -> nNodeType, pNodeName, strlen (pNodeName), xParentNode, level))) return rc ; if (pInside = pToken -> pInside) { - ParseTokens (r, &pCurr, pEnd, pInside, pToken -> sEndText, pToken -> pContains, pToken -> nCDataType, pNodeName, pNewNode, level+1) ; + ParseTokens (r, &pCurr, pEnd, pInside, pToken -> sEndText, pToken -> pContains, pToken -> nCDataType, pNodeName, xNewNode, level+1) ; } else { char * pEndCurr ; unsigned char * pContains ; + int nSkip ; if ((pContains = pToken -> pContains)) { pEndCurr = pCurr ; while (pContains [*pEndCurr >> 3] & (1 << (*pEndCurr & 7))) pEndCurr++ ; + nSkip = 0 ; } else + { pEndCurr = strstr (pCurr, pToken -> sEndText) ; + nSkip = strlen (pToken -> sEndText) ; + } + if (pEndCurr) { if (pEndCurr - pCurr && pToken -> nCDataType) - if ((rc = CreateNode (r, pToken -> nCDataType, pCurr, pEndCurr - pCurr, pNewNode, level+1, &pNewNode)) != ok) - return rc ; - pCurr = pEndCurr + strlen (pToken -> sEndText) ; + if (!(xNewNode = Node_appendChild (r, pToken -> nCDataType, pCurr, pEndCurr - pCurr, xNewNode, level+1))) + return 1 ; + pCurr = pEndCurr + nSkip ; } } if (pToken -> nNodeType == ntypStartTag) { level++ ; - pParentNode = pNewNode ; + xParentNode = xNewNode ; } } pCurrStart = pCurr ; @@ -447,20 +454,20 @@ if (pParentContains && ((pParentContains [*pCurr >> 3] & 1 << (*pCurr & 7)) == 0) ) { if (pCurr - pCurrStart && nCDataType) - if ((rc = CreateNode (r, nCDataType, pCurrStart, pCurr - pCurrStart, pParentNode, level, &pNewNode)) != ok) - return rc ; + if (!(xNewNode = Node_appendChild (r, nCDataType, pCurrStart, pCurr - pCurrStart, xParentNode, level))) + return 1 ; break ; } else if (sEndText == NULL || (*pCurr == *sEndText && strncmp (pCurr, sEndText, nEndText) == 0)) { if (pCurr - pCurrStart && nCDataType) - if ((rc = CreateNode (r, nCDataType, pCurrStart, pCurr - pCurrStart, pParentNode, level, &pNewNode)) != ok) - return rc ; + if (!(xNewNode = Node_appendChild (r, nCDataType, pCurrStart, pCurr - pCurrStart, xParentNode, level))) + return 1 ; pCurr += nEndText ; break ; } - else + else if (!pToken) pCurr++ ; } @@ -489,8 +496,10 @@ char * sTokenHash = "HTML::Embperl::Tokens::Main" ; HV * pTokenHash ; int rc ; - + tNode xDocNode ; + DomInit () ; + if ((pTokenHash = perl_get_hv ((char *)sTokenHash, TRUE)) == NULL) { return rcHashError ; @@ -500,8 +509,14 @@ LogError (r, rc) ; return rc ; } + + if (!(xDocNode = Node_appendChild (r, ntypTag, "attr", 3, 0, 0))) + return 1 ; - return ParseTokens (r, &pStart, pEnd, &pTable, "", NULL, ntypCDATA, "root", NULL, 0) ; + if (!(xDocNode = Node_appendChild (r, ntypTag, "doc", 3, 0, 0))) + return 1 ; + + return ParseTokens (r, &pStart, pEnd, &pTable, "", NULL, ntypCDATA, "root", xDocNode, 0) ; } No revision No revision 1.1.2.1 +26 -0 embperl/Attic/epcomp.c 1.1.2.1 +100 -0 embperl/Attic/epdom.h No revision No revision 1.9.2.3 +1 -0 embperl/test/html/input.htm Index: input.htm =================================================================== RCS file: /home/cvs/embperl/test/html/input.htm,v retrieving revision 1.9.2.2 retrieving revision 1.9.2.3 diff -u -r1.9.2.2 -r1.9.2.3 --- input.htm 2000/05/04 05:42:06 1.9.2.2 +++ input.htm 2000/05/08 18:16:28 1.9.2.3 @@ -1,3 +1,4 @@ +