manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1309631 - in /incubator/lcf/trunk: ./ connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcraw...
Date Thu, 05 Apr 2012 00:18:51 GMT
Author: kwright
Date: Thu Apr  5 00:18:51 2012
New Revision: 1309631

URL: http://svn.apache.org/viewvc?rev=1309631&view=rev
Log:
Fix for CONNECTORS-430.  Committed on behalf of Erlend.

Modified:
    incubator/lcf/trunk/CHANGES.txt
    incubator/lcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
    incubator/lcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
    incubator/lcf/trunk/framework/core/src/test/resources/org/apache/manifoldcf/core/tests/Javascript.py

Modified: incubator/lcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/CHANGES.txt?rev=1309631&r1=1309630&r2=1309631&view=diff
==============================================================================
--- incubator/lcf/trunk/CHANGES.txt (original)
+++ incubator/lcf/trunk/CHANGES.txt Thu Apr  5 00:18:51 2012
@@ -3,6 +3,10 @@ $Id$
 
 ======================= 0.6-dev =====================
 
+CONNECTORS-430: An error should be returned if invalid seeds are typed
+into the seeds list for the web connector
+(Erlend GarĂ¥sen, Karl Wright)
+
 CONNECTORS-442: Fix the binary exclusion rules to not include target
 or the xml build files.
 (Karl Wright)

Modified: incubator/lcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java?rev=1309631&r1=1309630&r2=1309631&view=diff
==============================================================================
--- incubator/lcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
(original)
+++ incubator/lcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Thu Apr  5 00:18:51 2012
@@ -3378,6 +3378,11 @@ public class WebcrawlerConnector extends
 "    editjob.exclusions.focus();\n"+
 "    return false;\n"+
 "  }\n"+
+"  if (check_seedsList() == false)\n"+
+"  {\n"+
+"    editjob.seeds.focus();\n"+
+"    return false;\n"+
+"  }\n"+
 "  return true;\n"+
 "}\n"+
 "\n"+
@@ -3408,6 +3413,33 @@ public class WebcrawlerConnector extends
 "  return rval;\n"+
 "}\n"+
 "\n"+
+"function check_seedsList()\n"+
+"{\n"+
+"  var regexp = /http(s)?:\\/\\/([a-z0-9+!*(),;?&=\\$_.-]+(\\:[a-z0-9+!*(),;?&=\\$_.-]+)?@)?[a-z0-9+\\$_-]+(\\.[a-z0-9+\\$_-]+)*(\\:[0-9]{2,5})?(\\/([a-z0-9+\\$_-]\\.?)+)*\\/?(\\?[a-z+&\\$_.-][a-z0-9;:@\\/&%=+\\$_.-]*)?(#[a-z_.-][a-z0-9+\\$_.-]*)?/;\n"+
+"  var lines = editjob.seeds.value.split(\"\\n\");\n"+
+"  var trimmedUrlList = \"\";\n"+
+"  var invalidUrlList = \"\";\n"+
+"  var i = 0;\n"+
+"  while (i < lines.length)\n"+
+"  {\n"+
+"    var line = lines[i].replace(/^\\s*/, \"\").replace(/\\s*$/, \"\");\n"+
+"    if (line.length > 0)\n"+
+"    {\n"+
+"      if (!regexp.test(line))\n"+
+"        invalidUrlList = invalidUrlList + line + \"\\n\";\n"+
+"      trimmedUrlList = trimmedUrlList + line + \"\\n\";\n"+      
+"    }\n"+
+"    i = i + 1;\n"+
+"  }\n"+
+"  editjob.seeds.value = trimmedUrlList;\n"+ 
+"  if (invalidUrlList.length > 0)\n"+
+"  {\n"+
+"    alert(\""+Messages.getBodyJavascriptString(locale,"WebcrawlerConnector.InvalidUrlsInSeedsList")+"\\n\"
+ invalidUrlList);\n"+
+"    return false;\n"+
+"  }\n"+
+"  return true;\n"+
+"}\n"+
+"\n"+
 "function SpecAddToken(anchorvalue)\n"+
 "{\n"+
 "  if (editjob.spectoken.value == \"\")\n"+

Modified: incubator/lcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties?rev=1309631&r1=1309630&r2=1309631&view=diff
==============================================================================
--- incubator/lcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
(original)
+++ incubator/lcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
Thu Apr  5 00:18:51 2012
@@ -205,4 +205,4 @@ WebcrawlerConnector.ErrorWas=.  Error wa
 WebcrawlerConnector.TypeInAnAccessToken=Type in an access token
 WebcrawlerConnector.TypeInMetadataName=Type in metadata name
 WebcrawlerConnector.TypeInMetadataValue=Type in metadata value
-
+WebcrawlerConnector.InvalidUrlsInSeedsList=Invalid URLs in seeds list:

Modified: incubator/lcf/trunk/framework/core/src/test/resources/org/apache/manifoldcf/core/tests/Javascript.py
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/framework/core/src/test/resources/org/apache/manifoldcf/core/tests/Javascript.py?rev=1309631&r1=1309630&r2=1309631&view=diff
==============================================================================
--- incubator/lcf/trunk/framework/core/src/test/resources/org/apache/manifoldcf/core/tests/Javascript.py
(original)
+++ incubator/lcf/trunk/framework/core/src/test/resources/org/apache/manifoldcf/core/tests/Javascript.py
Thu Apr  5 00:18:51 2012
@@ -305,6 +305,7 @@ class JSRegexpTestMethod( JSObject ):
             flags += re.MULTILINE
         if self.regexp.is_regexp_insensitive( ):
             flags += re.IGNORECASE
+        
         regexp = re.compile( self.regexp.get_regexp( ), flags )
 
         mo = regexp.match( testvalue )
@@ -1620,7 +1621,15 @@ class JSTokenStream:
                 if new_char == "\\":
                     self.start_index += 1
                     if self.start_index < len(self.body):
-                        the_string += self.body[ self.start_index ]
+                        the_char = self.body[ self.start_index ]
+                        # Deal with special characters
+                        if the_char == "n":
+                            the_char = "\n"
+                        elif the_char == "r":
+                            the_char = "\r"
+                        elif the_char == "t":
+                            the_char = "\t"
+                        the_string += the_char
                         self.start_index += 1
                 elif new_char == this_char:
                     # Maybe the end of the string, but we should zip past the whitespace



Mime
View raw message