incubator-connectors-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r994962 - /incubator/lcf/trunk/modules/connectors/webcrawler/connector/src/main/java/org/apache/acf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Date Wed, 08 Sep 2010 10:07:04 GMT
Author: kwright
Date: Wed Sep  8 10:07:04 2010
New Revision: 994962

URL: http://svn.apache.org/viewvc?rev=994962&view=rev
Log:
Add a default throttle via the UI on a new web connection, with reasonably restrictive throttling
parameters.  Hopefully this will cut down on the number of casual users that crawl the web
without any throttling.  CONNECTORS-102.

Modified:
    incubator/lcf/trunk/modules/connectors/webcrawler/connector/src/main/java/org/apache/acf/crawler/connectors/webcrawler/WebcrawlerConnector.java

Modified: incubator/lcf/trunk/modules/connectors/webcrawler/connector/src/main/java/org/apache/acf/crawler/connectors/webcrawler/WebcrawlerConnector.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/modules/connectors/webcrawler/connector/src/main/java/org/apache/acf/crawler/connectors/webcrawler/WebcrawlerConnector.java?rev=994962&r1=994961&r2=994962&view=diff
==============================================================================
--- incubator/lcf/trunk/modules/connectors/webcrawler/connector/src/main/java/org/apache/acf/crawler/connectors/webcrawler/WebcrawlerConnector.java
(original)
+++ incubator/lcf/trunk/modules/connectors/webcrawler/connector/src/main/java/org/apache/acf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Wed Sep  8 10:07:04 2010
@@ -1642,6 +1642,7 @@ public class WebcrawlerConnector extends
   public void outputConfigurationBody(IThreadContext threadContext, IHTTPOutput out, ConfigParams
parameters, String tabName)
     throws ACFException, IOException
   {
+    
     String email = parameters.getParameter(org.apache.acf.crawler.connectors.webcrawler.WebcrawlerConfig.PARAMETER_EMAIL);
     if (email == null)
       email = "";
@@ -1782,6 +1783,41 @@ public class WebcrawlerConnector extends
         }
       }
 
+      // If it looks like this is a brand-new configuration, add in a default throttle.
+      // This only works because other nodes must get created on the first post, and cannot
then be deleted.
+      if (parameters.getChildCount() == 0)
+      {
+        // It's prefix will be...
+        String prefix = "bandwidth_" + Integer.toString(binCounter);
+        out.print(
+"        <tr class=\""+(((binCounter % 2)==0)?"evenformrow":"oddformrow")+"\">\n"+
+"          <td class=\"formcolumncell\">\n"+
+"            <a name=\""+prefix+"\">\n"+
+"              <input type=\"button\" value=\"Delete\" alt=\""+"Delete bin regular expression
#"+Integer.toString(binCounter+1)+"\" onclick='javascript:deleteRegexp("+Integer.toString(binCounter)+");'/>\n"+
+"              <input type=\"hidden\" name=\""+"op_"+prefix+"\" value=\"Continue\"/>\n"+
+"              <input type=\"hidden\" name=\""+"regexp_"+prefix+"\" value=\"\"/>\n"+
+"            </a>\n"+
+"          </td>\n"+
+"          <td class=\"formcolumncell\">\n"+
+"            <nobr></nobr>\n"+
+"          </td>\n"+
+"          <td class=\"formcolumncell\">\n"+
+"            <nobr><input type=\"checkbox\" name=\"insensitive_"+prefix+"\" value=\"false\"/></nobr>\n"+
+"          </td>\n"+
+"          <td class=\"formcolumncell\">\n"+
+"            <nobr><input type=\"text\" size=\"5\" name=\"connections_"+prefix+"\"
value=\"2\"/></nobr>\n"+
+"          </td>\n"+
+"          <td class=\"formcolumncell\">\n"+
+"            <nobr><input type=\"text\" size=\"5\" name=\"rate_"+prefix+"\" value=\"64\"/></nobr>\n"+
+"          </td>\n"+
+"          <td class=\"formcolumncell\">\n"+
+"            <nobr><input type=\"text\" size=\"5\" name=\"fetches_"+prefix+"\" value=\"12\"/></nobr>\n"+
+"          </td>\n"+
+"        </tr>\n"
+        );
+        binCounter++;
+      }
+
       if (binCounter == 0)
       {
         out.print(
@@ -1868,6 +1904,23 @@ public class WebcrawlerConnector extends
           binCounter++;
         }
       }
+
+      // If it looks like this is a brand-new configuration, add in a default throttle.
+      // This only works because other nodes must get created on the first post, and cannot
then be deleted.
+      if (parameters.getChildCount() == 0)
+      {
+        // It's prefix will be...
+        String prefix = "bandwidth_" + Integer.toString(binCounter);
+        out.print(
+"<input type=\"hidden\" name=\""+"regexp_"+prefix+"\" value=\"\"/>\n"+
+"<input type=\"hidden\" name=\""+"insensitive_"+prefix+"\" value=\"false\"/>\n"+
+"<input type=\"hidden\" name=\""+"connections_"+prefix+"\" value=\"2\"/>\n"+
+"<input type=\"hidden\" name=\""+"rate_"+prefix+"\" value=\"64\"/>\n"+
+"<input type=\"hidden\" name=\""+"fetches_"+prefix+"\" value=\"12\"/>\n"
+        );
+        binCounter++;
+      }
+
       out.print(
 "<input type=\"hidden\" name=\"bandwidth_count\" value=\""+binCounter+"\"/>\n"
       );



Mime
View raw message