manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1598484 - in /manifoldcf/branches/CONNECTORS-946/framework: agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputAddActivity.java pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
Date Fri, 30 May 2014 08:05:37 GMT
Author: kwright
Date: Fri May 30 08:05:37 2014
New Revision: 1598484

URL: http://svn.apache.org/r1598484
Log:
Make pipeline check available in IOutputAddActivity

Modified:
    manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputAddActivity.java
    manifoldcf/branches/CONNECTORS-946/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java

Modified: manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputAddActivity.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputAddActivity.java?rev=1598484&r1=1598483&r2=1598484&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputAddActivity.java
(original)
+++ manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputAddActivity.java
Fri May 30 08:05:37 2014
@@ -24,7 +24,7 @@ import org.apache.manifoldcf.agents.inte
 /** This interface abstracts from the activities that an output connector can do
 when adding or replacing documents.
 */
-public interface IOutputAddActivity extends IOutputHistoryActivity
+public interface IOutputAddActivity extends IOutputHistoryActivity,IOutputCheckActivity
 {
   public static final String _rcsid = "@(#)$Id: IOutputAddActivity.java 988245 2010-08-23
18:39:35Z kwright $";
 

Modified: manifoldcf/branches/CONNECTORS-946/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-946/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java?rev=1598484&r1=1598483&r2=1598484&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-946/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
(original)
+++ manifoldcf/branches/CONNECTORS-946/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
Fri May 30 08:05:37 2014
@@ -2496,6 +2496,51 @@ public class WorkerThread extends Thread
       return IPipelineConnector.DOCUMENTSTATUS_REJECTED;
     }
 
+    /** Detect if a mime type is acceptable downstream or not.  This method is used to determine
whether it makes sense to fetch a document
+    * in the first place.
+    *@param mimeType is the mime type of the document.
+    *@return true if the mime type can be accepted by the downstream connection.
+    */
+    public boolean checkMimeTypeIndexable(String mimeType)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      return false;
+    }
+
+    /** Pre-determine whether a document (passed here as a File object) is acceptable downstream.
 This method is
+    * used to determine whether a document needs to be actually transferred.  This hook is
provided mainly to support
+    * search engines that only handle a small set of accepted file types.
+    *@param localFile is the local file to check.
+    *@return true if the file is acceptable by the downstream connection.
+    */
+    public boolean checkDocumentIndexable(File localFile)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      return false;
+    }
+
+    /** Pre-determine whether a document's length is acceptable downstream.  This method
is used
+    * to determine whether to fetch a document in the first place.
+    *@param length is the length of the document.
+    *@return true if the file is acceptable by the downstream connection.
+    */
+    public boolean checkLengthIndexable(long length)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      return false;
+    }
+
+    /** Pre-determine whether a document's URL is acceptable downstream.  This method is
used
+    * to help filter out documents that cannot be indexed in advance.
+    *@param url is the URL of the document.
+    *@return true if the file is acceptable by the downstream connection.
+    */
+    public boolean checkURLIndexable(String url)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      return false;
+    }
+
   }
 
 }



Mime
View raw message