manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1840122 - in /manifoldcf/trunk/connectors/webcrawler/connector/src/main: java/org/apache/manifoldcf/crawler/connectors/webcrawler/ native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/ resources/org/apache/manifoldcf/crawler/co...
Date Wed, 05 Sep 2018 11:38:11 GMT
Author: kwright
Date: Wed Sep  5 11:38:11 2018
New Revision: 1840122

URL: http://svn.apache.org/viewvc?rev=1840122&view=rev
Log:
CONNECTORS-1528: Add canonicalization mode local, and get the tab partly functioning again,
other than a second Add and a Delete, which still do not work.

Modified:
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_es_ES.properties
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_fr_FR.properties
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_zh_CN.properties
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/editSpecification_Canonicalization.html.vm
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/viewSpecification.html.vm

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java
(original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java
Wed Sep  5 11:38:11 2018
@@ -170,6 +170,8 @@ public class WebcrawlerConfig
   public static final String ATTR_PHPSESSIONREMOVAL = "phpsessionremoval";
   /** bvsessionremoval attribute */
   public static final String ATTR_BVSESSIONREMOVAL = "bvsessionremoval";
+  /** map to lower case */
+  public static final String ATTR_LOWERCASE = "lowercase";
   /** name attribute */
   public static final String ATTR_NAME = "name";
   /** token attribute */

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
(original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Wed Sep  5 11:38:11 2018
@@ -2385,7 +2385,7 @@ public class WebcrawlerConnector extends
           allowReorderOutput = Messages.getBodyString(locale, "WebcrawlerConnector.no");
         }
         else
-          allowReorderOutput = allowReorder;
+          allowReorderOutput = allowReorder.equals(WebcrawlerConfig.ATTRVALUE_NO)?Messages.getBodyString(locale,
"WebcrawlerConnector.no"):Messages.getBodyString(locale, "WebcrawlerConnector.yes");
         String allowJavaSessionRemoval = specNode.getAttributeValue(WebcrawlerConfig.ATTR_JAVASESSIONREMOVAL);
         String allowJavaSessionRemovalOutput;
         if (allowJavaSessionRemoval == null || allowJavaSessionRemoval.length() == 0)
@@ -2394,7 +2394,7 @@ public class WebcrawlerConnector extends
           allowJavaSessionRemovalOutput = Messages.getBodyString(locale, "WebcrawlerConnector.no");
         }
         else
-          allowJavaSessionRemovalOutput = allowJavaSessionRemoval;
+          allowJavaSessionRemovalOutput = allowJavaSessionRemoval.equals(WebcrawlerConfig.ATTRVALUE_NO)?Messages.getBodyString(locale,
"WebcrawlerConnector.no"):Messages.getBodyString(locale, "WebcrawlerConnector.yes");;
         String allowASPSessionRemoval = specNode.getAttributeValue(WebcrawlerConfig.ATTR_ASPSESSIONREMOVAL);
         String allowASPSessionRemovalOutput;
         if (allowASPSessionRemoval == null || allowASPSessionRemoval.length() == 0)
@@ -2403,7 +2403,7 @@ public class WebcrawlerConnector extends
           allowASPSessionRemovalOutput = Messages.getBodyString(locale, "WebcrawlerConnector.no");
         }
         else
-          allowASPSessionRemovalOutput = allowASPSessionRemoval;
+          allowASPSessionRemovalOutput = allowASPSessionRemoval.equals(WebcrawlerConfig.ATTRVALUE_NO)?Messages.getBodyString(locale,
"WebcrawlerConnector.no"):Messages.getBodyString(locale, "WebcrawlerConnector.yes");;
         String allowPHPSessionRemoval = specNode.getAttributeValue(WebcrawlerConfig.ATTR_PHPSESSIONREMOVAL);
         String allowPHPSessionRemovalOutput;
         if (allowPHPSessionRemoval == null || allowPHPSessionRemoval.length() == 0)
@@ -2412,7 +2412,7 @@ public class WebcrawlerConnector extends
           allowPHPSessionRemovalOutput = Messages.getBodyString(locale, "WebcrawlerConnector.no");
         }
         else
-          allowPHPSessionRemovalOutput = allowPHPSessionRemoval;
+          allowPHPSessionRemovalOutput = allowPHPSessionRemoval.equals(WebcrawlerConfig.ATTRVALUE_NO)?Messages.getBodyString(locale,
"WebcrawlerConnector.no"):Messages.getBodyString(locale, "WebcrawlerConnector.yes");;
         String allowBVSessionRemoval = specNode.getAttributeValue(WebcrawlerConfig.ATTR_BVSESSIONREMOVAL);
         String allowBVSessionRemovalOutput;
         if (allowBVSessionRemoval == null || allowBVSessionRemoval.length() == 0)
@@ -2421,7 +2421,16 @@ public class WebcrawlerConnector extends
           allowBVSessionRemovalOutput = Messages.getBodyString(locale, "WebcrawlerConnector.no");
         }
         else
-          allowBVSessionRemovalOutput = allowBVSessionRemoval;
+          allowBVSessionRemovalOutput = allowBVSessionRemoval.equals(WebcrawlerConfig.ATTRVALUE_NO)?Messages.getBodyString(locale,
"WebcrawlerConnector.no"):Messages.getBodyString(locale, "WebcrawlerConnector.yes");;
+        String allowLowercasing = specNode.getAttributeValue(WebcrawlerConfig.ATTR_LOWERCASE);
+        String allowLowercasingOutput;
+        if (allowLowercasing == null || allowLowercasing.length() == 0)
+        {
+          allowLowercasing = WebcrawlerConfig.ATTRVALUE_NO;
+          allowLowercasingOutput = Messages.getBodyString(locale, "WebcrawlerConnector.no");
+        }
+        else
+          allowLowercasingOutput = allowLowercasing.equals(WebcrawlerConfig.ATTRVALUE_NO)?Messages.getBodyString(locale,
"WebcrawlerConnector.no"):Messages.getBodyString(locale, "WebcrawlerConnector.yes");;
 
         canonicalizationMap.put("regexpString",regexpString);
         canonicalizationMap.put("description",description);
@@ -2435,7 +2444,9 @@ public class WebcrawlerConnector extends
         canonicalizationMap.put("allowPHPSessionRemovalOutput",allowPHPSessionRemovalOutput);
         canonicalizationMap.put("allowBVSessionRemoval",allowBVSessionRemoval);
         canonicalizationMap.put("allowBVSessionRemovalOutput",allowBVSessionRemovalOutput);
-
+        canonicalizationMap.put("allowLowercasing",allowLowercasing);
+        canonicalizationMap.put("allowLowercasingOutput",allowLowercasingOutput);
+        
         canonicalizationMapList.add(canonicalizationMap);
       }
     }
@@ -2937,6 +2948,7 @@ public class WebcrawlerConnector extends
           String aspSession = variableContext.getParameter(seqPrefix+"urlregexpasp_"+Integer.toString(j));
           String phpSession = variableContext.getParameter(seqPrefix+"urlregexpphp_"+Integer.toString(j));
           String bvSession = variableContext.getParameter(seqPrefix+"urlregexpbv_"+Integer.toString(j));
+          String lowercasing = variableContext.getParameter(seqPrefix+"urlregexplowercasing_"+Integer.toString(j));
           SpecificationNode newSn = new SpecificationNode(WebcrawlerConfig.NODE_URLSPEC);
           newSn.setAttribute(WebcrawlerConfig.ATTR_REGEXP,regexp);
           if (regexpDescription != null && regexpDescription.length() > 0)
@@ -2951,6 +2963,8 @@ public class WebcrawlerConnector extends
             newSn.setAttribute(WebcrawlerConfig.ATTR_PHPSESSIONREMOVAL,phpSession);
           if (bvSession != null && bvSession.length() > 0)
             newSn.setAttribute(WebcrawlerConfig.ATTR_BVSESSIONREMOVAL,bvSession);
+          if (lowercasing != null && lowercasing.length() > 0)
+            newSn.setAttribute(WebcrawlerConfig.ATTR_LOWERCASE,lowercasing);
           ds.addChild(ds.getChildCount(),newSn);
         }
         j++;
@@ -2964,6 +2978,7 @@ public class WebcrawlerConnector extends
         String aspSession = variableContext.getParameter(seqPrefix+"urlregexpasp");
         String phpSession = variableContext.getParameter(seqPrefix+"urlregexpphp");
         String bvSession = variableContext.getParameter(seqPrefix+"urlregexpbv");
+        String lowercasing = variableContext.getParameter(seqPrefix+"urlregexplowercasing");
 
         // Add a new node at the end
         SpecificationNode newSn = new SpecificationNode(WebcrawlerConfig.NODE_URLSPEC);
@@ -2980,6 +2995,8 @@ public class WebcrawlerConnector extends
           newSn.setAttribute(WebcrawlerConfig.ATTR_PHPSESSIONREMOVAL,phpSession);
         if (bvSession != null && bvSession.length() > 0)
           newSn.setAttribute(WebcrawlerConfig.ATTR_BVSESSIONREMOVAL,bvSession);
+        if (lowercasing != null && lowercasing.length() > 0)
+          newSn.setAttribute(WebcrawlerConfig.ATTR_LOWERCASE,lowercasing);
         ds.addChild(ds.getChildCount(),newSn);
       }
     }
@@ -3656,6 +3673,11 @@ public class WebcrawlerConnector extends
     // Put it back into the URL without the ref, and with the modified query and path parts.
     url = new WebURL(url.getScheme(),url.getHost(),url.getPort(),pathString,queryString);
     String rval = url.toASCIIString();
+    // Here is where we decide to bash to lowercase, if so indicated
+    if (p != null && p.canLowercase())
+    {
+      rval = rval.toLowerCase(Locale.ROOT);
+    }
     return rval;
   }
 
@@ -5383,9 +5405,10 @@ public class WebcrawlerConnector extends
     protected final boolean removeAspSession;
     protected final boolean removePhpSession;
     protected final boolean removeBVSession;
+    protected final boolean lowercasing;
 
     public CanonicalizationPolicy(Pattern matchPattern, boolean reorder, boolean removeJavaSession,
boolean removeAspSession,
-      boolean removePhpSession, boolean removeBVSession)
+      boolean removePhpSession, boolean removeBVSession, boolean lowercasing)
     {
       this.matchPattern = matchPattern;
       this.reorder = reorder;
@@ -5393,6 +5416,7 @@ public class WebcrawlerConnector extends
       this.removeAspSession = removeAspSession;
       this.removePhpSession = removePhpSession;
       this.removeBVSession = removeBVSession;
+      this.lowercasing = lowercasing;
     }
 
     public boolean checkMatch(String url)
@@ -5426,6 +5450,11 @@ public class WebcrawlerConnector extends
       return removeBVSession;
     }
 
+    public boolean canLowercase()
+    {
+      return lowercasing;
+    }
+
   }
 
   /** Class representing a list of canonicalization rules */
@@ -5738,10 +5767,20 @@ public class WebcrawlerConnector extends
           {
             bvSessionValue = bvSession.equals(WebcrawlerConfig.ATTRVALUE_YES);
           }
+
+          String lowercasing = sn.getAttributeValue(WebcrawlerConfig.ATTR_LOWERCASE);
+          boolean lowercasingValue;
+          if (lowercasing == null)
+            lowercasingValue = false;
+          else
+          {
+            lowercasingValue = lowercasing.equals(WebcrawlerConfig.ATTRVALUE_YES);
+          }
+          
           try
           {
             canonicalizationPolicies.addRule(new CanonicalizationPolicy(Pattern.compile(urlRegexp),reorderValue,javaSessionValue,aspSessionValue,
-              phpSessionValue, bvSessionValue));
+              phpSessionValue, bvSessionValue, lowercasingValue));
           }
           catch (java.util.regex.PatternSyntaxException e)
           {

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
(original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
Wed Sep  5 11:38:11 2018
@@ -64,6 +64,7 @@ WebcrawlerConnector.TrustEverything=Trus
 WebcrawlerConnector.NoTrustCertificates=No trust certificates
 WebcrawlerConnector.Description=Description
 WebcrawlerConnector.Reorder=Reorder?
+WebcrawlerConnector.MapToLowercase=Map to lowercase?
 WebcrawlerConnector.RemoveJSPSessions=Remove JSP sessions?
 WebcrawlerConnector.RemoveASPSessions=Remove ASP sessions?
 WebcrawlerConnector.RemovePHPSessions=Remove PHP sessions?

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_es_ES.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_es_ES.properties?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_es_ES.properties
(original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_es_ES.properties
Wed Sep  5 11:38:11 2018
@@ -64,6 +64,7 @@ WebcrawlerConnector.TrustEverything=La c
 WebcrawlerConnector.NoTrustCertificates=No hay certificados de confianza
 WebcrawlerConnector.Description=Descripción
 WebcrawlerConnector.Reorder=reordenar?
+WebcrawlerConnector.MapToLowercase=Map to lowercase?
 WebcrawlerConnector.RemoveJSPSessions=Retire sesiones JSP?
 WebcrawlerConnector.RemoveASPSessions=Retire sesiones ASP?
 WebcrawlerConnector.RemovePHPSessions=Retire las sesiones de PHP?

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_fr_FR.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_fr_FR.properties?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_fr_FR.properties
(original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_fr_FR.properties
Wed Sep  5 11:38:11 2018
@@ -64,6 +64,7 @@ WebcrawlerConnector.TrustEverything=Fair
 WebcrawlerConnector.NoTrustCertificates=Aucun certificat de confiance
 WebcrawlerConnector.Description=Description
 WebcrawlerConnector.Reorder=Reordonner (reorder)?
+WebcrawlerConnector.MapToLowercase=Map to lowercase?
 WebcrawlerConnector.RemoveJSPSessions=Retirer les sessions JSP?
 WebcrawlerConnector.RemoveASPSessions=Retirer les sessions ASP?
 WebcrawlerConnector.RemovePHPSessions=Retirer les sessions PHP?

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties
(original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties
Wed Sep  5 11:38:11 2018
@@ -64,6 +64,7 @@ WebcrawlerConnector.TrustEverything=ã
 WebcrawlerConnector.NoTrustCertificates=トラストサーティフィケートがありません
 WebcrawlerConnector.Description=説明
 WebcrawlerConnector.Reorder=ソート
+WebcrawlerConnector.MapToLowercase=Map to lowercase?
 WebcrawlerConnector.RemoveJSPSessions=JSPセッションを削除
 WebcrawlerConnector.RemoveASPSessions=ASPセッションを削除
 WebcrawlerConnector.RemovePHPSessions=PHPセッションを削除

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_zh_CN.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_zh_CN.properties?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_zh_CN.properties
(original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_zh_CN.properties
Wed Sep  5 11:38:11 2018
@@ -64,6 +64,7 @@ WebcrawlerConnector.TrustEverything=ä¿
 WebcrawlerConnector.NoTrustCertificates=无信任证书
 WebcrawlerConnector.Description=説明
 WebcrawlerConnector.Reorder=重排
+WebcrawlerConnector.MapToLowercase=Map to lowercase?
 WebcrawlerConnector.RemoveJSPSessions=删除JSP会话
 WebcrawlerConnector.RemoveASPSessions=删除ASP会话
 WebcrawlerConnector.RemovePHPSessions=删除PHP会话

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/editSpecification_Canonicalization.html.vm
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/editSpecification_Canonicalization.html.vm?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/editSpecification_Canonicalization.html.vm
(original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/editSpecification_Canonicalization.html.vm
Wed Sep  5 11:38:11 2018
@@ -30,6 +30,7 @@
       <th>$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.URLRegularExpression'))</th>
       <th>$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.Description'))</th>
       <th>$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.Reorder'))</th>
+      <th>$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.MapToLowercase'))</th>
       <th>$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.RemoveJSPSessions'))</th>
       <th>$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.RemoveASPSessions'))</th>
       <th>$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.RemovePHPSessions'))</th>
@@ -41,21 +42,23 @@
         <button class="btn btn-danger btn-xs" type="button" title="$Encoder.attributeEscape($ResourceBundle.getString('WebcrawlerConnector.DeleteUrlRegexp'))$Encoder.attributeEscape($canonicalizationMap["regexpString"])"
onclick='javascript:${SEQPREFIX}URLRegexpDelete($foreach.index,"${SEQPREFIX}urlregexp_${foreach.index}");'><i
class="fa fa-minus-circle fa-fw" aria-hidden="true"></i>$Encoder.attributeEscape($ResourceBundle.getString('WebcrawlerConnector.Delete'))</button>
       </td>
       <td>
-        <input type="hidden" name="${SEQPREFIX}urlregexp_$foreach.index" value="$Encoder.attributeEscape($canonicalizationMap["regexpString"])"/>
-        <input type="hidden" name="${SEQPREFIX}urlregexpdesc_$foreach.index" value="$Encoder.attributeEscape($canonicalizationMap["description"])"/>
-        <input type="hidden" name="${SEQPREFIX}urlregexpreorder_$foreach.index" value="$canonicalizationMap["allowReorder"]"/>
-        <input type="hidden" name="${SEQPREFIX}urlregexpjava_$foreach.index" value="$canonicalizationMap["allowJavaSessionRemoval"]"/>
-        <input type="hidden" name="${SEQPREFIX}urlregexpasp_$foreach.index" value="$canonicalizationMap["allowASPSessionRemoval"]"/>
-        <input type="hidden" name="${SEQPREFIX}urlregexpphp_$foreach.index" value="$canonicalizationMap["allowPHPSessionRemoval"]"/>
-        <input type="hidden" name="${SEQPREFIX}urlregexpbv_$foreach.index" value="$canonicalizationMap["allowBVSessionRemoval"]"/>
+        <input type="hidden" name="${SEQPREFIX}urlregexp_$foreach.index" value="$Encoder.attributeEscape($canonicalizationMap['regexpString'])"/>
+        <input type="hidden" name="${SEQPREFIX}urlregexpdesc_$foreach.index" value="$Encoder.attributeEscape($canonicalizationMap['description'])"/>
+        <input type="hidden" name="${SEQPREFIX}urlregexpreorder_$foreach.index" value="$canonicalizationMap['allowReorder']"/>
+        <input type="hidden" name="${SEQPREFIX}urlregexplowercasing_$foreach.index" value="$canonicalizationMap['allowLowercasing']"/>
+        <input type="hidden" name="${SEQPREFIX}urlregexpjava_$foreach.index" value="$canonicalizationMap['allowJavaSessionRemoval']"/>
+        <input type="hidden" name="${SEQPREFIX}urlregexpasp_$foreach.index" value="$canonicalizationMap['allowASPSessionRemoval']"/>
+        <input type="hidden" name="${SEQPREFIX}urlregexpphp_$foreach.index" value="$canonicalizationMap['allowPHPSessionRemoval']"/>
+        <input type="hidden" name="${SEQPREFIX}urlregexpbv_$foreach.index" value="$canonicalizationMap['allowBVSessionRemoval']"/>
         $Encoder.bodyEscape($canonicalizationMap["regexpString"])
       </td>
-      <td>$Encoder.bodyEscape($canonicalizationMap["description"])</td>
-      <td>$canonicalizationMap["allowReorderOutput"]</td>
-      <td>$canonicalizationMap["allowJavaSessionRemovalOutput"]</td>
-      <td>$canonicalizationMap["allowASPSessionRemovalOutput"]</td>
-      <td>$canonicalizationMap["allowPHPSessionRemovalOutput"]</td>
-      <td>$canonicalizationMap["allowBVSessionRemovalOutput"]</td>
+      <td>$Encoder.bodyEscape($canonicalizationMap['description'])</td>
+      <td>$canonicalizationMap['allowReorderOutput']</td>
+      <td>$canonicalizationMap['allowLowercasingOutput']</td>
+      <td>$canonicalizationMap['allowJavaSessionRemovalOutput']</td>
+      <td>$canonicalizationMap['allowASPSessionRemovalOutput']</td>
+      <td>$canonicalizationMap['allowPHPSessionRemovalOutput']</td>
+      <td>$canonicalizationMap['allowBVSessionRemovalOutput']</td>
     </tr>
     #end
   </table>
@@ -65,6 +68,7 @@
 <hr/>
 <div class="row">
   <div class="col-md-4">
+    <input type="hidden" name="${SEQPREFIX}urlregexpop" value="CONTINUE"/>
     <input type="hidden" name="${SEQPREFIX}urlregexpcount" value="$COUNTER"/>
     <div class="form-group">
       <label for="${SEQPREFIX}urlregexp">$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.URLRegularExpression'))</label>
@@ -82,6 +86,11 @@
       </div>
       <div class="checkbox">
         <label>
+          <input type="checkbox" name="${SEQPREFIX}urlregexplowercasing" value="yes"/>
$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.MapToLowercase'))
+        </label>
+      </div>
+      <div class="checkbox">
+        <label>
           <input type="checkbox" name="${SEQPREFIX}urlregexpjava" value="yes" checked="true"/>
$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.RemoveJSPSessions'))
         </label>
       </div>
@@ -108,13 +117,14 @@
 </div>
 #else
   #foreach($canonicalizationMap in $CANONICALIZATIONMAPLIST)
-<input type="hidden" name="${SEQPREFIX}urlregexp_$foreach.index" value="$Encoder.attributeEscape($canonicalizationMap["regexpString"])"/>
-<input type="hidden" name="${SEQPREFIX}urlregexpdesc_$foreach.index" value="$Encoder.attributeEscape($canonicalizationMap["description"])"/>
-<input type="hidden" name="${SEQPREFIX}urlregexpreorder_$foreach.index" value="$canonicalizationMap["allowReorder"]"/>
-<input type="hidden" name="${SEQPREFIX}urlregexpjava_$foreach.index" value="$canonicalizationMap["allowJavaSessionRemoval"]"/>
-<input type="hidden" name="${SEQPREFIX}urlregexpasp_$foreach.index" value="$canonicalizationMap["allowASPSessionRemoval"]"/>
-<input type="hidden" name="${SEQPREFIX}urlregexpphp_$foreach.index" value="$canonicalizationMap["allowPHPSessionRemoval"]"/>
-<input type="hidden" name="${SEQPREFIX}urlregexpbv_$foreach.index" value="$canonicalizationMap["allowBVSessionRemoval"]"/>
+<input type="hidden" name="${SEQPREFIX}urlregexp_$foreach.index" value="$Encoder.attributeEscape($canonicalizationMap['regexpString'])"/>
+<input type="hidden" name="${SEQPREFIX}urlregexpdesc_$foreach.index" value="$Encoder.attributeEscape($canonicalizationMap['description'])"/>
+<input type="hidden" name="${SEQPREFIX}urlregexpreorder_$foreach.index" value="$canonicalizationMap['allowReorder']"/>
+<input type="hidden" name="${SEQPREFIX}urlregexplowercasing_$foreach.index" value="$canonicalizationMap['allowLowercasing']"/>
+<input type="hidden" name="${SEQPREFIX}urlregexpjava_$foreach.index" value="$canonicalizationMap['allowJavaSessionRemoval']"/>
+<input type="hidden" name="${SEQPREFIX}urlregexpasp_$foreach.index" value="$canonicalizationMap['allowASPSessionRemoval']"/>
+<input type="hidden" name="${SEQPREFIX}urlregexpphp_$foreach.index" value="$canonicalizationMap['allowPHPSessionRemoval']"/>
+<input type="hidden" name="${SEQPREFIX}urlregexpbv_$foreach.index" value="$canonicalizationMap['allowBVSessionRemoval']"/>
   #end
   #set( $COUNTER = $CANONICALIZATIONMAPLIST.size())
 <input type="hidden" name="${SEQPREFIX}urlregexpcount" value="$COUNTER"/>

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/viewSpecification.html.vm
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/viewSpecification.html.vm?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/viewSpecification.html.vm
(original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/viewSpecification.html.vm
Wed Sep  5 11:38:11 2018
@@ -28,6 +28,7 @@
           <th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.URLRegexp"))</th>
           <th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.Description"))</th>
           <th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.Reorder"))</th>
+          <th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.MapToLowercase"))</th>
           <th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.RemoveJSPSessions"))</th>
           <th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.RemoveASPSessions"))</th>
           <th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.RemovePHPSessions"))</th>
@@ -38,6 +39,7 @@
           <td>$Encoder.bodyEscape($canonicalizationMap["regexpString"])</td>
           <td>$Encoder.bodyEscape($canonicalizationMap["description"])</td>
           <td>$canonicalizationMap["allowReorderOutput"]</td>
+          <td>$canonicalizationMap["allowLowercasingOutput"]</td>
           <td>$canonicalizationMap["allowJavaSessionRemovalOutput"]</td>
           <td>$canonicalizationMap["allowASPSessionRemovalOutput"]</td>
           <td>$canonicalizationMap["allowPHPSessionRemovalOutput"]</td>



Mime
View raw message