Return-Path: Delivered-To: apmail-myfaces-dev-archive@www.apache.org Received: (qmail 92885 invoked from network); 23 Sep 2009 08:03:39 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 23 Sep 2009 08:03:39 -0000 Received: (qmail 89410 invoked by uid 500); 23 Sep 2009 08:03:38 -0000 Delivered-To: apmail-myfaces-dev-archive@myfaces.apache.org Received: (qmail 89299 invoked by uid 500); 23 Sep 2009 08:03:38 -0000 Mailing-List: contact dev-help@myfaces.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: "MyFaces Development" Delivered-To: mailing list dev@myfaces.apache.org Received: (qmail 89128 invoked by uid 99); 23 Sep 2009 08:03:38 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 23 Sep 2009 08:03:38 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.140] (HELO brutus.apache.org) (140.211.11.140) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 23 Sep 2009 08:03:36 +0000 Received: from brutus (localhost [127.0.0.1]) by brutus.apache.org (Postfix) with ESMTP id 1975A234C48D for ; Wed, 23 Sep 2009 01:03:16 -0700 (PDT) Message-ID: <1482690734.1253692996103.JavaMail.jira@brutus> Date: Wed, 23 Sep 2009 01:03:16 -0700 (PDT) From: "Lutz Ulruch (JIRA)" To: dev@myfaces.apache.org Subject: [jira] Updated: (TOMAHAWK-1458) ReducedHTMLParser: incorrect assumption about STATE_EXPECTING_ETAGO In-Reply-To: <210664162.1253692996053.JavaMail.jira@brutus> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit X-JIRA-FingerPrint: 30527f35849b9dde25b450d4833f0394 X-Virus-Checked: Checked by ClamAV on apache.org [ https://issues.apache.org/jira/browse/TOMAHAWK-1458?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Lutz Ulruch updated TOMAHAWK-1458: ---------------------------------- Status: Open (was: Patch Available) > ReducedHTMLParser: incorrect assumption about STATE_EXPECTING_ETAGO > ------------------------------------------------------------------- > > Key: TOMAHAWK-1458 > URL: https://issues.apache.org/jira/browse/TOMAHAWK-1458 > Project: MyFaces Tomahawk > Issue Type: Bug > Components: ExtensionsFilter > Affects Versions: 1.1.9 > Reporter: Lutz Ulruch > > ReducedHTMLParser assumes that . This is not true. > Raw example: > > In this case, ReducedHTMLParser switches to STATE_READY when "" is handled. But the ", like in > ; }" > > Patch (my changes indicated by comments // L. Ulrich ...) > void parse() > { > int state = STATE_READY; > int currentTagStart = -1; > String currentTagName = null; > _lineNumber = 1; > _offset = 0; > int lastOffset = _offset -1; > > // L. Ulrich, 23.09.2009: > // New helper variable which holds the tag name > // in case of STATE_EXPECTING_ETAGO > String currentEtagoTagName = null; > > while (_offset < _seq.length()) > { > // Sanity check; each pass through this loop must increase the offset. > // Failure to do this means a hang situation has occurred. > if (_offset <= lastOffset) > { > // throw new RuntimeException("Infinite loop detected in ReducedHTMLParser"); > log.error("Infinite loop detected in ReducedHTMLParser; parsing skipped."+ > " Surroundings: '" + getTagSurroundings() +"'."); > //return; > } > lastOffset = _offset; > if (state == STATE_READY) > { > // in this state, nothing but "<" has any significance > consumeExcept("<"); > if (isFinished()) > { > break; > } > if (consumeMatch("")) > { > state = STATE_READY; > } > else > { > // false call; hyphen is not end of comment > consumeMatch("-"); > } > continue; > } > if (state == STATE_IN_TAG) > { > consumeWhitespace(); > if (consumeMatch("/>")) > { > // ok, end of element > state = STATE_READY; > closedTag(currentTagStart, _offset, currentTagName); > // and reset vars just in case... > currentTagStart = -1; > currentTagName = null; > } > else if (consumeMatch(">")) > { > if (currentTagName.equalsIgnoreCase("script") > || currentTagName.equalsIgnoreCase("style")) > { > // We've just started a special tag which can contain anything except > // the ETAGO marker (" // http://www.w3.org/TR/REC-html40/appendix/notes.html#notes-specifying-data > state = STATE_EXPECTING_ETAGO; > > // L. Ulrich, 23.09.2009: > // set currentEtagoTagName > currentEtagoTagName = currentTagName; > } > else > { > state = STATE_READY; > } > // end of open tag, but not end of element > openedTag(currentTagStart, _offset, currentTagName); > // and reset vars just in case... > currentTagStart = -1; > currentTagName = null; > } > else > { > // xml attribute > String attrName = consumeAttrName(); > if (attrName == null) > { > // Oops, we found something quite unexpected in this tag. > // The best we can do is probably to drop back to looking > // for "/>", though that does risk us misinterpreting the > // contents of an attribute's associated string value. > log.warn("Invalid tag found: unexpected input while looking for attr name or '/>'" > + " at line " + getCurrentLineNumber()+". "+ > "Surroundings: '" + getTagSurroundings() +"'."); > state = STATE_EXPECTING_ETAGO; > // and consume one character > ++_offset; > } > else > { > consumeWhitespace(); > // html can have "stand-alone" attributes with no following equals sign > if (consumeMatch("=")) > { > consumeAttrValue(); > } > } > } > continue; > } > if (state == STATE_IN_MARKED_SECTION) > { > // in this state, nothing but "]]>" has any significance > consumeExcept("]"); > if (isFinished()) > { > break; > } > if (consumeMatch("]]>")) > { > state = STATE_READY; > } > else > { > // false call; ] is not end of cdata section > consumeMatch("]"); > } > continue; > } > if (state == STATE_EXPECTING_ETAGO) > { > // The term "ETAGO" is the official spec term for " consumeExcept("<"); > if (isFinished()) > { > log.debug("Malformed input page; input terminated while tag not closed."); > break; > } > if (consumeMatch(" { > // L. Ulrich, 23.09.2009: > // Workaround to skip other tags used within scripts: > // Test if the closed tag refers to currentEtagoTagName. > // Example: > // > // => do not tread as the script closing tag > // > // Note that this will still not work as expected > // in case of recursive tags. > // Example: > // "; ... } > CharSequence str = this._seq.subSequence(this._offset, > this._offset + currentEtagoTagName.length()); > if (str.toString().equals(currentEtagoTagName)) > { > if (!processEndTag()) > { > return; > } > state = STATE_READY; > currentEtagoTagName = null; > } > } > else > { > // false call; < does not start an ETAGO > consumeMatch("<"); > } > continue; > } > } > } -- This message is automatically generated by JIRA. - You can reply to this email to add a comment to the issue online.