manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1718194 - in /manifoldcf/trunk: CHANGES.txt framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuzzyml/TagParseState.java
Date Sun, 06 Dec 2015 17:56:29 GMT
Author: kwright
Date: Sun Dec  6 17:56:29 2015
New Revision: 1718194

URL: http://svn.apache.org/viewvc?rev=1718194&view=rev
Log:
Fix for CONNECTORS-1264.

Modified:
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuzzyml/TagParseState.java

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1718194&r1=1718193&r2=1718194&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Sun Dec  6 17:56:29 2015
@@ -3,6 +3,10 @@ $Id$
 
 ======================= 2.3-dev =====================
 
+CONNECTORS-1264: Fix handling of slashes in unquoted attribute values
+in html parsing.
+(Issei Nishigata, Karl Wright)
+
 CONNECTORS-1249: Independent priority setting for different
 connectors, and bring individual connectors up to speed with proper
 document bin names.

Modified: manifoldcf/trunk/framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuzzyml/TagParseState.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuzzyml/TagParseState.java?rev=1718194&r1=1718193&r2=1718194&view=diff
==============================================================================
--- manifoldcf/trunk/framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuzzyml/TagParseState.java
(original)
+++ manifoldcf/trunk/framework/connector-common/src/main/java/org/apache/manifoldcf/connectorcommon/fuzzyml/TagParseState.java
Sun Dec  6 17:56:29 2015
@@ -75,7 +75,8 @@ public class TagParseState extends Singl
   protected static final int TAGPARSESTATE_IN_CDATA_BODY = 27;
   protected static final int TAGPARSESTATE_SAWRIGHTBRACKET = 28;
   protected static final int TAGPARSESTATE_SAWSECONDRIGHTBRACKET = 29;
-
+  protected static final int TAGPARSESTATE_IN_UNQUOTED_ATTR_VALUE_SAW_SLASH = 30;
+  
   protected int currentState = TAGPARSESTATE_NORMAL;
 
   /** The btag depth, which indicates btag behavior when > 0. */
@@ -724,6 +725,8 @@ public class TagParseState extends Singl
         currentState = TAGPARSESTATE_IN_SINGLE_QUOTES_ATTR_VALUE;
       else if (thisChar == '"')
         currentState = TAGPARSESTATE_IN_DOUBLE_QUOTES_ATTR_VALUE;
+      else if (thisChar == '/')
+        currentState = TAGPARSESTATE_IN_UNQUOTED_ATTR_VALUE_SAW_SLASH;
       else if (!isWhitespace(thisChar))
       {
         currentState = TAGPARSESTATE_IN_UNQUOTED_ATTR_VALUE;
@@ -864,9 +867,10 @@ public class TagParseState extends Singl
         currentValueBuffer.append(thisChar);
       break;
 
-    case TAGPARSESTATE_IN_UNQUOTED_ATTR_VALUE:
+    case TAGPARSESTATE_IN_UNQUOTED_ATTR_VALUE_SAW_SLASH:
       if (isWhitespace(thisChar))
       {
+        currentValueBuffer.append('/');
         currentAttrList.add(new AttrNameValue(currentAttrName,attributeDecode(currentValueBuffer.toString())));
         currentAttrName = null;
         currentValueBuffer = null;
@@ -875,10 +879,38 @@ public class TagParseState extends Singl
       }
       else if (thisChar == '/')
       {
+        currentValueBuffer.append('/');
+      }
+      else if (thisChar == '>')
+      {
         currentAttrList.add(new AttrNameValue(currentAttrName,attributeDecode(currentValueBuffer.toString())));
+        currentAttrName = null;
+        currentValueBuffer = null;
+        currentState = TAGPARSESTATE_NORMAL;
         if (noteTag(currentTagName,currentAttrList))
           return true;
-        currentState = TAGPARSESTATE_IN_TAG_SAW_SLASH;
+        currentTagName = null;
+        currentAttrList = null;
+      }
+      else
+      {
+        currentValueBuffer.append('/');
+        currentValueBuffer.append(thisChar);
+      }
+      break;
+
+    case TAGPARSESTATE_IN_UNQUOTED_ATTR_VALUE:
+      if (isWhitespace(thisChar))
+      {
+        currentAttrList.add(new AttrNameValue(currentAttrName,attributeDecode(currentValueBuffer.toString())));
+        currentAttrName = null;
+        currentValueBuffer = null;
+        currentState = TAGPARSESTATE_IN_ATTR_NAME;
+        currentAttrNameBuffer = newBuffer();
+      }
+      else if (thisChar == '/')
+      {
+        currentState = TAGPARSESTATE_IN_UNQUOTED_ATTR_VALUE_SAW_SLASH;
       }
       else if (thisChar == '>')
       {



Mime
View raw message