manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1598300 - in /manifoldcf/branches/CONNECTORS-946/framework: agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/ agents/src/main/java/org/apache/manifoldcf/agents/interfaces/ agents/src/main/java/org/apache/manifoldcf/agent...
Date Thu, 29 May 2014 14:16:47 GMT
Author: kwright
Date: Thu May 29 14:16:47 2014
New Revision: 1598300

URL: http://svn.apache.org/r1598300
Log:
Put code that is common between output and transformation connectors into common interface

Added:
    manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java   (with props)
    manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java   (with props)
Removed:
    manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/ITransformationActivity.java
    manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/TransformationSpecification.java
Modified:
    manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
    manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputAddActivity.java
    manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputConnector.java
    manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/ITransformationConnector.java
    manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java
    manifoldcf/branches/CONNECTORS-946/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java

Modified: manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java?rev=1598300&r1=1598299&r2=1598300&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java (original)
+++ manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java Thu May 29 14:16:47 2014
@@ -214,7 +214,7 @@ public class IncrementalIngester extends
       throw new ServiceInterruption("Output connector not installed",0L);
     try
     {
-      return connector.checkMimeTypeIndexable(outputDescription,mimeType);
+      return connector.checkMimeTypeIndexable(outputDescription,mimeType,null);
     }
     finally
     {
@@ -239,7 +239,7 @@ public class IncrementalIngester extends
       throw new ServiceInterruption("Output connector not installed",0L);
     try
     {
-      return connector.checkDocumentIndexable(outputDescription,localFile);
+      return connector.checkDocumentIndexable(outputDescription,localFile,null);
     }
     finally
     {
@@ -265,7 +265,7 @@ public class IncrementalIngester extends
       throw new ServiceInterruption("Output connector not installed",0L);
     try
     {
-      return connector.checkLengthIndexable(outputDescription,length);
+      return connector.checkLengthIndexable(outputDescription,length,null);
     }
     finally
     {
@@ -291,7 +291,7 @@ public class IncrementalIngester extends
       throw new ServiceInterruption("Output connector not installed",0L);
     try
     {
-      return connector.checkURLIndexable(outputDescription,url);
+      return connector.checkURLIndexable(outputDescription,url,null);
     }
     finally
     {
@@ -315,7 +315,7 @@ public class IncrementalIngester extends
       throw new ServiceInterruption("Output connector not installed",0L);
     try
     {
-      return connector.getOutputDescription(spec);
+      return connector.getPipelineDescription(spec);
     }
     finally
     {

Modified: manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputAddActivity.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputAddActivity.java?rev=1598300&r1=1598299&r2=1598300&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputAddActivity.java (original)
+++ manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputAddActivity.java Thu May 29 14:16:47 2014
@@ -37,4 +37,11 @@ public interface IOutputAddActivity exte
   public String qualifyAccessToken(String authorityNameString, String accessToken)
     throws ManifoldCFException;
 
+  /** Send a document via the pipeline to the next output connection.
+  *@param document is the document data to be processed (handed to the output data store).
+  *@return the document status (accepted or permanently rejected); return codes are listed in IPipelineConnector.
+  */
+  public int sendDocument(RepositoryDocument document)
+    throws ManifoldCFException, ServiceInterruption;
+
 }

Added: manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java?rev=1598300&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java (added)
+++ manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java Thu May 29 14:16:47 2014
@@ -0,0 +1,66 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.interfaces;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.agents.interfaces.*;
+
+import java.io.*;
+
+/** This interface abstracts from the activities that a transformation connector can do
+when checking a document.
+*/
+public interface IOutputCheckActivity
+{
+  public static final String _rcsid = "@(#)$Id$";
+
+  /** Detect if a mime type is acceptable downstream or not.  This method is used to determine whether it makes sense to fetch a document
+  * in the first place.
+  *@param mimeType is the mime type of the document.
+  *@return true if the mime type can be accepted by the downstream connection.
+  */
+  public boolean checkMimeTypeIndexable(String mimeType)
+    throws ManifoldCFException, ServiceInterruption;
+
+  /** Pre-determine whether a document (passed here as a File object) is acceptable downstream.  This method is
+  * used to determine whether a document needs to be actually transferred.  This hook is provided mainly to support
+  * search engines that only handle a small set of accepted file types.
+  *@param localFile is the local file to check.
+  *@return true if the file is acceptable by the downstream connection.
+  */
+  public boolean checkDocumentIndexable(File localFile)
+    throws ManifoldCFException, ServiceInterruption;
+
+  /** Pre-determine whether a document's length is acceptable downstream.  This method is used
+  * to determine whether to fetch a document in the first place.
+  *@param length is the length of the document.
+  *@return true if the file is acceptable by the downstream connection.
+  */
+  public boolean checkLengthIndexable(long length)
+    throws ManifoldCFException, ServiceInterruption;
+
+  /** Pre-determine whether a document's URL is acceptable downstream.  This method is used
+  * to help filter out documents that cannot be indexed in advance.
+  *@param url is the URL of the document.
+  *@return true if the file is acceptable by the downstream connection.
+  */
+  public boolean checkURLIndexable(String url)
+    throws ManifoldCFException, ServiceInterruption;
+
+}

Propchange: manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java
------------------------------------------------------------------------------
    svn:keywords = Id

Modified: manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputConnector.java?rev=1598300&r1=1598299&r2=1598300&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputConnector.java Thu May 29 14:16:47 2014
@@ -46,17 +46,10 @@ import java.util.*;
 * Connector Framework is concerned).
 *
 */
-public interface IOutputConnector extends IConnector
+public interface IOutputConnector extends IPipelineConnector
 {
   public static final String _rcsid = "@(#)$Id: IOutputConnector.java 998081 2010-09-17 11:33:15Z kwright $";
 
-  // Document statuses
-
-  /** Document accepted */
-  public final static int DOCUMENTSTATUS_ACCEPTED = 0;
-  /** Document permanently rejected */
-  public final static int DOCUMENTSTATUS_REJECTED = 1;
-
   /** Return a list of activities that this connector generates.
   * The connector does NOT need to be connected before this method is called.
   *@return the set of activities.
@@ -73,75 +66,6 @@ public interface IOutputConnector extend
   public boolean requestInfo(Configuration output, String command)
     throws ManifoldCFException;
     
-    
-  /** Get an output version string, given an output specification.  The output version string is used to uniquely describe the pertinent details of
-  * the output specification and the configuration, to allow the Connector Framework to determine whether a document will need to be output again.
-  * Note that the contents of the document cannot be considered by this method, and that a different version string (defined in IRepositoryConnector)
-  * is used to describe the version of the actual document.
-  *
-  * This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
-  * necessary.
-  *@param spec is the current output specification for the job that is doing the crawling.
-  *@return a string, of unlimited length, which uniquely describes output configuration and specification in such a way that if two such strings are equal,
-  * the document will not need to be sent again to the output data store.
-  */
-  public String getOutputDescription(OutputSpecification spec)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Detect if a mime type is indexable or not.  This method is used by participating repository connectors to pre-filter the number of
-  * unusable documents that will be passed to this output connector.
-  *@param outputDescription is the document's output version.
-  *@param mimeType is the mime type of the document.
-  *@return true if the mime type is indexable by this connector.
-  */
-  public boolean checkMimeTypeIndexable(String outputDescription, String mimeType)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Pre-determine whether a document (passed here as a File object) is indexable by this connector.  This method is used by participating
-  * repository connectors to help reduce the number of unmanageable documents that are passed to this output connector in advance of an
-  * actual transfer.  This hook is provided mainly to support search engines that only handle a small set of accepted file types.
-  *@param outputDescription is the document's output version.
-  *@param localFile is the local file to check.
-  *@return true if the file is indexable.
-  */
-  public boolean checkDocumentIndexable(String outputDescription, File localFile)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Pre-determine whether a document's length is indexable by this connector.  This method is used by participating repository connectors
-  * to help filter out documents that are too long to be indexable.
-  *@param outputDescription is the document's output version.
-  *@param length is the length of the document.
-  *@return true if the file is indexable.
-  */
-  public boolean checkLengthIndexable(String outputDescription, long length)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Pre-determine whether a document's URL is indexable by this connector.  This method is used by participating repository connectors
-  * to help filter out documents that are not worth indexing.
-  *@param outputDescription is the document's output version.
-  *@param url is the URL of the document.
-  *@return true if the file is indexable.
-  */
-  public boolean checkURLIndexable(String outputDescription, String url)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Add (or replace) a document in the output data store using the connector.
-  * This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
-  * necessary.
-  * The OutputSpecification is *not* provided to this method, because the goal is consistency, and if output is done it must be consistent with the
-  * output description, since that was what was partly used to determine if output should be taking place.  So it may be necessary for this method to decode
-  * an output description string in order to determine what should be done.
-  *@param documentURI is the URI of the document.  The URI is presumed to be the unique identifier which the output data store will use to process
-  * and serve the document.  This URI is constructed by the repository connector which fetches the document, and is thus universal across all output connectors.
-  *@param outputDescription is the description string that was constructed for this document by the getOutputDescription() method.
-  *@param document is the document data to be processed (handed to the output data store).
-  *@param authorityNameString is the name of the authority responsible for authorizing any access tokens passed in with the repository document.  May be null.
-  *@param activities is the handle to an object that the implementer of an output connector may use to perform operations, such as logging processing activity.
-  *@return the document status (accepted or permanently rejected).
-  */
-  public int addOrReplaceDocument(String documentURI, String outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
-    throws ManifoldCFException, ServiceInterruption;
-
   /** Remove a document using the connector.
   * Note that the last outputDescription is included, since it may be necessary for the connector to use such information to know how to properly remove the document.
   *@param documentURI is the URI of the document.  The URI is presumed to be the unique identifier which the output data store will use to process
@@ -167,69 +91,8 @@ public interface IOutputConnector extend
   public void noteAllRecordsRemoved()
     throws ManifoldCFException;
 
-  // UI support methods.
-  //
-  // These support methods come in two varieties.  The first bunch (inherited from IConnector) is involved in setting up connection configuration information.
-  // The second bunch
-  // is involved in presenting and editing output specification information for a job.  The two kinds of methods are accordingly treated differently,
-  // in that the first bunch cannot assume that the current connector object is connected, while the second bunch can.  That is why the first bunch
-  // receives a thread context argument for all UI methods, while the second bunch does not need one (since it has already been applied via the connect()
-  // method, above).
-    
-  /** Output the specification header section.
-  * This method is called in the head section of a job page which has selected an output connection of the current type.  Its purpose is to add the required tabs
-  * to the list, and to output any javascript methods that might be needed by the job editing HTML.
-  *@param out is the output to which any HTML should be sent.
-  *@param locale is the preferred local of the output.
-  *@param os is the current output specification for this job.
-  *@param connectionSequenceNumber is the unique number of this connection within the job.
-  *@param tabsArray is an array of tab names.  Add to this array any tab names that are specific to the connector.
-  */
-  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, OutputSpecification os,
-    int connectionSequenceNumber, List<String> tabsArray)
-    throws ManifoldCFException, IOException;
-  
-  /** Output the specification body section.
-  * This method is called in the body section of a job page which has selected an output connection of the current type.  Its purpose is to present the required form elements for editing.
-  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
-  * form is "editjob".
-  *@param out is the output to which any HTML should be sent.
-  *@param locale is the preferred local of the output.
-  *@param os is the current output specification for this job.
-  *@param connectionSequenceNumber is the unique number of this connection within the job.
-  *@param actualSequenceNumber is the connection within the job that has currently been selected.
-  *@param tabName is the current tab name.
-  */
-  public void outputSpecificationBody(IHTTPOutput out, Locale locale, OutputSpecification os,
-    int connectionSequenceNumber, int actualSequenceNumber, String tabName)
-    throws ManifoldCFException, IOException;
-  
-  /** Process a specification post.
-  * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
-  * posted.  Its purpose is to gather form information and modify the output specification accordingly.
-  * The name of the posted form is "editjob".
-  *@param variableContext contains the post data, including binary file-upload information.
-  *@param locale is the preferred local of the output.
-  *@param os is the current output specification for this job.
-  *@param connectionSequenceNumber is the unique number of this connection within the job.
-  *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
-  */
-  public String processSpecificationPost(IPostParameters variableContext, Locale locale, OutputSpecification os,
-    int connectionSequenceNumber)
-    throws ManifoldCFException;
-  
-  /** View specification.
-  * This method is called in the body section of a job's view page.  Its purpose is to present the output specification information to the user.
-  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
-  *@param out is the output to which any HTML should be sent.
-  *@param locale is the preferred local of the output.
-  *@param connectionSequenceNumber is the unique number of this connection within the job.
-  *@param os is the current output specification for this job.
-  */
-  public void viewSpecification(IHTTPOutput out, Locale locale, OutputSpecification os,
-    int connectionSequenceNumber)
-    throws ManifoldCFException, IOException;
-  
+  // UI support methods are inherited from IConnector and IPipelineConnector.
+ 
 }
 
 

Added: manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java?rev=1598300&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java (added)
+++ manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java Thu May 29 14:16:47 2014
@@ -0,0 +1,184 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.interfaces;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.agents.interfaces.*;
+
+import java.io.*;
+import java.util.*;
+
+/** This interface describes an instance of a connector which can live in a chained processing pipeline.
+* Both transformation connectors and output connectors are expected to extend this interface.
+*
+* Pipeline connectors have two basic functions:
+* (1) Processing documents, and optionally passing them to the next pipeline stage;
+* (2) Determining if a document is acceptable, optionally by querying the next pipeline stage.
+*
+*/
+public interface IPipelineConnector extends IConnector
+{
+  public static final String _rcsid = "@(#)$Id$";
+
+  // Document statuses
+
+  /** Document accepted */
+  public final static int DOCUMENTSTATUS_ACCEPTED = 0;
+  /** Document permanently rejected */
+  public final static int DOCUMENTSTATUS_REJECTED = 1;
+
+  /** Get a pipeline version string, given a pipeline specification object.  The version string is used to
+  * uniquely describe the pertinent details of the specification and the configuration, to allow the Connector 
+  * Framework to determine whether a document will need to be processed again.
+  * Note that the contents of any document cannot be considered by this method; only configuration and specification information
+  * can be considered.
+  *
+  * This method presumes that the underlying connector object has been configured.
+  *@param spec is the current pipeline specification object for this connection for the job that is doing the crawling.
+  *@return a string, of unlimited length, which uniquely describes configuration and specification in such a way that
+  * if two such strings are equal, nothing that affects how or whether the document is indexed will be different.
+  */
+  public String getPipelineDescription(OutputSpecification spec)
+    throws ManifoldCFException, ServiceInterruption;
+
+  /** Detect if a mime type is acceptable or not.  This method is used to determine whether it makes sense to fetch a document
+  * in the first place.
+  *@param pipelineDescription is the document's pipeline version string, for this connection.
+  *@param mimeType is the mime type of the document.
+  *@param checkActivity is an object including the activities that can be performed by this method.
+  *@return true if the mime type can be accepted by this connector.
+  */
+  public boolean checkMimeTypeIndexable(String pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
+    throws ManifoldCFException, ServiceInterruption;
+
+  /** Pre-determine whether a document (passed here as a File object) is acceptable or not.  This method is
+  * used to determine whether a document needs to be actually transferred.  This hook is provided mainly to support
+  * search engines that only handle a small set of accepted file types.
+  *@param pipelineDescription is the document's pipeline version string, for this connection.
+  *@param localFile is the local file to check.
+  *@param checkActivity is an object including the activities that can be done by this method.
+  *@return true if the file is acceptable, false if not.
+  */
+  public boolean checkDocumentIndexable(String pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
+    throws ManifoldCFException, ServiceInterruption;
+
+  /** Pre-determine whether a document's length is acceptable.  This method is used
+  * to determine whether to fetch a document in the first place.
+  *@param pipelineDescription is the document's pipeline version string, for this connection.
+  *@param length is the length of the document.
+  *@param checkActivity is an object including the activities that can be done by this method.
+  *@return true if the file is acceptable, false if not.
+  */
+  public boolean checkLengthIndexable(String pipelineDescription, long length, IOutputCheckActivity checkActivity)
+    throws ManifoldCFException, ServiceInterruption;
+
+  /** Pre-determine whether a document's URL is acceptable.  This method is used
+  * to help filter out documents that cannot be indexed in advance.
+  *@param pipelineDescription is the document's pipeline version string, for this connection.
+  *@param url is the URL of the document.
+  *@param checkActivity is an object including the activities that can be done by this method.
+  *@return true if the file is acceptable, false if not.
+  */
+  public boolean checkURLIndexable(String pipelineDescription, String url, IOutputCheckActivity checkActivity)
+    throws ManifoldCFException, ServiceInterruption;
+
+  /** Add (or replace) a document in the output data store using the connector.
+  * This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
+  * necessary.
+  * The OutputSpecification is *not* provided to this method, because the goal is consistency, and if output is done it must be consistent with the
+  * output description, since that was what was partly used to determine if output should be taking place.  So it may be necessary for this method to decode
+  * an output description string in order to determine what should be done.
+  *@param documentURI is the URI of the document.  The URI is presumed to be the unique identifier which the output data store will use to process
+  * and serve the document.  This URI is constructed by the repository connector which fetches the document, and is thus universal across all output connectors.
+  *@param outputDescription is the description string that was constructed for this document by the getOutputDescription() method.
+  *@param document is the document data to be processed (handed to the output data store).
+  *@param authorityNameString is the name of the authority responsible for authorizing any access tokens passed in with the repository document.  May be null.
+  *@param activities is the handle to an object that the implementer of a pipeline connector may use to perform operations, such as logging processing activity,
+  * or sending a modified document to the next stage in the pipeline.
+  *@return the document status (accepted or permanently rejected).
+  */
+  public int addOrReplaceDocument(String documentURI, String pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+    throws ManifoldCFException, ServiceInterruption;
+
+  // UI support methods.
+  //
+  // These support methods come in two varieties.  The first bunch (inherited from IConnector) is involved in setting up connection configuration information.
+  // The second bunch
+  // is involved in presenting and editing pipeline specification information for a connection within a job.  The two kinds of methods are accordingly treated differently,
+  // in that the first bunch cannot assume that the current connector object is connected, while the second bunch can.  That is why the first bunch
+  // receives a thread context argument for all UI methods, while the second bunch does not need one (since it has already been applied via the connect()
+  // method, above).
+    
+  /** Output the specification header section.
+  * This method is called in the head section of a job page which has selected a pipeline connection of the current type.  Its purpose is to add the required tabs
+  * to the list, and to output any javascript methods that might be needed by the job editing HTML.
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this connection.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param tabsArray is an array of tab names.  Add to this array any tab names that are specific to the connector.
+  */
+  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, OutputSpecification os,
+    int connectionSequenceNumber, List<String> tabsArray)
+    throws ManifoldCFException, IOException;
+  
+  /** Output the specification body section.
+  * This method is called in the body section of a job page which has selected a pipeline connection of the current type.  Its purpose is to present the required form elements for editing.
+  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
+  * form is "editjob".
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param actualSequenceNumber is the connection within the job that has currently been selected.
+  *@param tabName is the current tab name.
+  */
+  public void outputSpecificationBody(IHTTPOutput out, Locale locale, OutputSpecification os,
+    int connectionSequenceNumber, int actualSequenceNumber, String tabName)
+    throws ManifoldCFException, IOException;
+  
+  /** Process a specification post.
+  * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
+  * posted.  Its purpose is to gather form information and modify the transformation specification accordingly.
+  * The name of the posted form is "editjob".
+  *@param variableContext contains the post data, including binary file-upload information.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
+  */
+  public String processSpecificationPost(IPostParameters variableContext, Locale locale, OutputSpecification os,
+    int connectionSequenceNumber)
+    throws ManifoldCFException;
+  
+  /** View specification.
+  * This method is called in the body section of a job's view page.  Its purpose is to present the pipeline specification information to the user.
+  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param os is the current pipeline specification for this job.
+  */
+  public void viewSpecification(IHTTPOutput out, Locale locale, OutputSpecification os,
+    int connectionSequenceNumber)
+    throws ManifoldCFException, IOException;
+  
+}
+
+

Propchange: manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java
------------------------------------------------------------------------------
    svn:keywords = Id

Modified: manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/ITransformationConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/ITransformationConnector.java?rev=1598300&r1=1598299&r2=1598300&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/ITransformationConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/ITransformationConnector.java Thu May 29 14:16:47 2014
@@ -45,17 +45,10 @@ import java.util.*;
 * being sent to an output connection.
 *
 */
-public interface ITransformationConnector extends IConnector
+public interface ITransformationConnector extends IPipelineConnector
 {
   public static final String _rcsid = "@(#)$Id$";
 
-  // Document statuses
-
-  /** Document accepted */
-  public final static int DOCUMENTSTATUS_ACCEPTED = IOutputConnector.DOCUMENTSTATUS_ACCEPTED;
-  /** Document permanently rejected */
-  public final static int DOCUMENTSTATUS_REJECTED = IOutputConnector.DOCUMENTSTATUS_REJECTED;
-
   /** Return a list of activities that this connector generates.
   * The connector does NOT need to be connected before this method is called.
   *@return the set of activities.
@@ -72,142 +65,8 @@ public interface ITransformationConnecto
   public boolean requestInfo(Configuration output, String command)
     throws ManifoldCFException;
     
-    
-  /** Get a transformation version string, given a transformation specification.  The transformation version string is used to
-  * uniquely describe the pertinent details of the transformation specification and the configuration, to allow the Connector 
-  * Framework to determine whether a document will need to be processed again.
-  * Note that the contents of the document cannot be considered by this method; only configuration and specification information
-  * can be considered.
-  *
-  * This method presumes that the connector object has been configured, and it is thus able to communicate with the transformation
-  * engine should that be necessary.
-  *@param spec is the current transformation specification for the job that is doing the crawling.
-  *@return a string, of unlimited length, which uniquely describes translation configuration and specification in such a way that
-  * if two such strings are equal, a document will not need to be translated again .
-  */
-  public String getTranslationDescription(TransformationSpecification spec)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Detect if a mime type is transformable or not.  This method is used to determine whether it makes sense to fetch a document
-  * in the first place.
-  *@param transformationDescription is the document's translation version.
-  *@param mimeType is the mime type of the document.
-  *@param checkActivity is an object including the activities that can be done by this method.
-  *@return true if the mime type can be accepted by this connector.
-  */
-  public boolean checkMimeTypeIndexable(String transformationDescription, String mimeType, ITransformationCheckActivity checkActivity)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Pre-determine whether a document (passed here as a File object) is transformable by this connector.  This method is
-  * used to determine whether a document needs to be actually transferred.  This hook is provided mainly to support
-  * search engines that only handle a small set of accepted file types.
-  *@param transformationDescription is the document's transformation version.
-  *@param localFile is the local file to check.
-  *@param checkActivity is an object including the activities that can be done by this method.
-  *@return true if the file is indexable.
-  */
-  public boolean checkDocumentIndexable(String transformationDescription, File localFile, ITransformationCheckActivity checkActivity)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Pre-determine whether a document's length is transformable by this connector.  This method is used
-  * to determine whether to fetch a document in the first place.
-  *@param transformationDescription is the document's transformation version.
-  *@param length is the length of the document.
-  *@param checkActivity is an object including the activities that can be done by this method.
-  *@return true if the file is indexable.
-  */
-  public boolean checkLengthIndexable(String transformationDescription, long length, ITransformationCheckActivity checkActivity)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Pre-determine whether a document's URL is transformable by this connector.  This method is used
-  * to help filter out documents that cannot be indexed in advance.
-  *@param transformationDescription is the document's transformation version.
-  *@param url is the URL of the document.
-  *@param checkActivity is an object including the activities that can be done by this method.
-  *@return true if the file is indexable.
-  */
-  public boolean checkURLIndexable(String transformationDescription, String url, ITransformationCheckActivity checkActivity)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Transform a document using the connector.
-  * This method presumes that the connector object has been configured, and it is thus able to communicate with the
-  * transformation engine should that be necessary.
-  * The TransformationSpecification is *not* provided to this method, because the goal is consistency, and if output is done it
-  * must be consistent with the transformation description, since that was what was partly used to determine if
-  * transformation should be taking place.  So it may be necessary for this method to decode
-  * an transformation description string in order to determine what should be done.
-  *@param transformationDescription is the description string that was constructed for this document by the getTransformationDescription() method.
-  *@param documentURI is the URI of the document.  This is for transformation purposes only.
-  *@param document is the document data to be transformed (handed to the transformation engine).
-  *@param activities is the handle to an object that the implementer of a transformation connector may use to perform operations, such as
-  * sending the transformed document downstream for further processing, or logging processing activity.
-  *@return the document status (accepted or permanently rejected).
-  */
-  public int transformDocument(String transformationDescription, String documentURI, RepositoryDocument document, ITransformationActivity activities)
-    throws ManifoldCFException, ServiceInterruption;
-
-  // UI support methods.
+  // UI support methods are inherited from IConnector and IPipelineConnector.
   //
-  // These support methods come in two varieties.  The first bunch (inherited from IConnector) is involved in setting up connection configuration information.
-  // The second bunch
-  // is involved in presenting and editing transformation specification information for a job.  The two kinds of methods are accordingly treated differently,
-  // in that the first bunch cannot assume that the current connector object is connected, while the second bunch can.  That is why the first bunch
-  // receives a thread context argument for all UI methods, while the second bunch does not need one (since it has already been applied via the connect()
-  // method, above).
-    
-  /** Output the specification header section.
-  * This method is called in the head section of a job page which has selected a transformation connection of the current type.  Its purpose is to add the required tabs
-  * to the list, and to output any javascript methods that might be needed by the job editing HTML.
-  *@param out is the output to which any HTML should be sent.
-  *@param locale is the preferred local of the output.
-  *@param os is the current output specification for this job.
-  *@param connectionSequenceNumber is the unique number of this connection within the job.
-  *@param tabsArray is an array of tab names.  Add to this array any tab names that are specific to the connector.
-  */
-  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, TransformationSpecification os,
-    int connectionSequenceNumber, List<String> tabsArray)
-    throws ManifoldCFException, IOException;
-  
-  /** Output the specification body section.
-  * This method is called in the body section of a job page which has selected a transformation connection of the current type.  Its purpose is to present the required form elements for editing.
-  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
-  * form is "editjob".
-  *@param out is the output to which any HTML should be sent.
-  *@param locale is the preferred local of the output.
-  *@param os is the current output specification for this job.
-  *@param connectionSequenceNumber is the unique number of this connection within the job.
-  *@param actualSequenceNumber is the connection within the job that has currently been selected.
-  *@param tabName is the current tab name.
-  */
-  public void outputSpecificationBody(IHTTPOutput out, Locale locale, TransformationSpecification os,
-    int connectionSequenceNumber, int actualSequenceNumber, String tabName)
-    throws ManifoldCFException, IOException;
-  
-  /** Process a specification post.
-  * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
-  * posted.  Its purpose is to gather form information and modify the transformation specification accordingly.
-  * The name of the posted form is "editjob".
-  *@param variableContext contains the post data, including binary file-upload information.
-  *@param locale is the preferred local of the output.
-  *@param os is the current output specification for this job.
-  *@param connectionSequenceNumber is the unique number of this connection within the job.
-  *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
-  */
-  public String processSpecificationPost(IPostParameters variableContext, Locale locale, TransformationSpecification os,
-    int connectionSequenceNumber)
-    throws ManifoldCFException;
-  
-  /** View specification.
-  * This method is called in the body section of a job's view page.  Its purpose is to present the transformation specification information to the user.
-  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
-  *@param out is the output to which any HTML should be sent.
-  *@param locale is the preferred local of the output.
-  *@param connectionSequenceNumber is the unique number of this connection within the job.
-  *@param os is the current output specification for this job.
-  */
-  public void viewSpecification(IHTTPOutput out, Locale locale, TransformationSpecification os,
-    int connectionSequenceNumber)
-    throws ManifoldCFException, IOException;
   
 }
 

Modified: manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java?rev=1598300&r1=1598299&r2=1598300&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java Thu May 29 14:16:47 2014
@@ -81,13 +81,26 @@ public abstract class BaseOutputConnecto
     // The base implementation does nothing here.
   }
 
+  /** Detect if a mime type is acceptable or not.  This method is used to determine whether it makes sense to fetch a document
+  * in the first place.
+  *@param pipelineDescription is the document's pipeline version string, for this connection.
+  *@param mimeType is the mime type of the document.
+  *@param checkActivity is an object including the activities that can be performed by this method.
+  *@return true if the mime type can be accepted by this connector.
+  */
+  @Override
+  public boolean checkMimeTypeIndexable(String pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    return checkMimeTypeIndexable(pipelineDescription, mimeType);
+  }
+
   /** Detect if a mime type is indexable or not.  This method is used by participating repository connectors to pre-filter the number of
   * unusable documents that will be passed to this output connector.
   *@param outputDescription is the document's output version.
   *@param mimeType is the mime type of the document.
   *@return true if the mime type is indexable by this connector.
   */
-  @Override
   public boolean checkMimeTypeIndexable(String outputDescription, String mimeType)
     throws ManifoldCFException, ServiceInterruption
   {
@@ -105,6 +118,21 @@ public abstract class BaseOutputConnecto
     return true;
   }
 
+  /** Pre-determine whether a document (passed here as a File object) is acceptable or not.  This method is
+  * used to determine whether a document needs to be actually transferred.  This hook is provided mainly to support
+  * search engines that only handle a small set of accepted file types.
+  *@param pipelineDescription is the document's pipeline version string, for this connection.
+  *@param localFile is the local file to check.
+  *@param checkActivity is an object including the activities that can be done by this method.
+  *@return true if the file is acceptable, false if not.
+  */
+  @Override
+  public boolean checkDocumentIndexable(String pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    return checkDocumentIndexable(pipelineDescription, localFile);
+  }
+
   /** Pre-determine whether a document (passed here as a File object) is indexable by this connector.  This method is used by participating
   * repository connectors to help reduce the number of unmanageable documents that are passed to this output connector in advance of an
   * actual transfer.  This hook is provided mainly to support search engines that only handle a small set of accepted file types.
@@ -112,7 +140,6 @@ public abstract class BaseOutputConnecto
   *@param localFile is the local file to check.
   *@return true if the file is indexable.
   */
-  @Override
   public boolean checkDocumentIndexable(String outputDescription, File localFile)
     throws ManifoldCFException, ServiceInterruption
   {
@@ -131,32 +158,76 @@ public abstract class BaseOutputConnecto
     return true;
   }
 
+  /** Pre-determine whether a document's length is acceptable.  This method is used
+  * to determine whether to fetch a document in the first place.
+  *@param pipelineDescription is the document's pipeline version string, for this connection.
+  *@param length is the length of the document.
+  *@param checkActivity is an object including the activities that can be done by this method.
+  *@return true if the file is acceptable, false if not.
+  */
+  @Override
+  public boolean checkLengthIndexable(String pipelineDescription, long length, IOutputCheckActivity checkActivity)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    return checkLengthIndexable(pipelineDescription, length);
+  }
+
   /** Pre-determine whether a document's length is indexable by this connector.  This method is used by participating repository connectors
   * to help filter out documents that are too long to be indexable.
   *@param outputDescription is the document's output version.
   *@param length is the length of the document.
   *@return true if the file is indexable.
   */
-  @Override
   public boolean checkLengthIndexable(String outputDescription, long length)
     throws ManifoldCFException, ServiceInterruption
   {
     return true;
   }
 
+  /** Pre-determine whether a document's URL is acceptable.  This method is used
+  * to help filter out documents that cannot be indexed in advance.
+  *@param pipelineDescription is the document's pipeline version string, for this connection.
+  *@param url is the URL of the document.
+  *@param checkActivity is an object including the activities that can be done by this method.
+  *@return true if the file is acceptable, false if not.
+  */
+  @Override
+  public boolean checkURLIndexable(String pipelineDescription, String url, IOutputCheckActivity checkActivity)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    return checkURLIndexable(pipelineDescription, url);
+  }
+
   /** Pre-determine whether a document's URL is indexable by this connector.  This method is used by participating repository connectors
   * to help filter out documents that are not worth indexing.
   *@param outputDescription is the document's output version.
   *@param url is the URL of the document.
   *@return true if the file is indexable.
   */
-  @Override
   public boolean checkURLIndexable(String outputDescription, String url)
     throws ManifoldCFException, ServiceInterruption
   {
     return true;
   }
 
+  /** Get a pipeline version string, given a pipeline specification object.  The version string is used to
+  * uniquely describe the pertinent details of the specification and the configuration, to allow the Connector 
+  * Framework to determine whether a document will need to be processed again.
+  * Note that the contents of any document cannot be considered by this method; only configuration and specification information
+  * can be considered.
+  *
+  * This method presumes that the underlying connector object has been configured.
+  *@param spec is the current pipeline specification object for this connection for the job that is doing the crawling.
+  *@return a string, of unlimited length, which uniquely describes configuration and specification in such a way that
+  * if two such strings are equal, nothing that affects how or whether the document is indexed will be different.
+  */
+  @Override
+  public String getPipelineDescription(OutputSpecification spec)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    return getOutputDescription(spec);
+  }
+
   /** Get an output version string, given an output specification.  The output version string is used to uniquely describe the pertinent details of
   * the output specification and the configuration, to allow the Connector Framework to determine whether a document will need to be output again.
   * Note that the contents of the document cannot be considered by this method, and that a different version string (defined in IRepositoryConnector)
@@ -168,7 +239,6 @@ public abstract class BaseOutputConnecto
   *@return a string, of unlimited length, which uniquely describes output configuration and specification in such a way that if two such strings are equal,
   * the document will not need to be sent again to the output data store.
   */
-  @Override
   public String getOutputDescription(OutputSpecification spec)
     throws ManifoldCFException, ServiceInterruption
   {

Modified: manifoldcf/branches/CONNECTORS-946/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-946/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java?rev=1598300&r1=1598299&r2=1598300&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-946/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java (original)
+++ manifoldcf/branches/CONNECTORS-946/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java Thu May 29 14:16:47 2014
@@ -2485,6 +2485,17 @@ public class WorkerThread extends Thread
           return URLEncoder.encode(authorityNameString) + ":" + URLEncoder.encode(accessToken);
     }
 
+    /** Send adocument via the pipeline to the next connection.
+    *@param document is the document data to be processed (handed to the output data store).
+    *@return the document status (accepted or permanently rejected); return codes are listed in IPipelineConnector.
+    */
+    public int sendDocument(RepositoryDocument document)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      // No downstream connection at output connection level.
+      return IPipelineConnector.DOCUMENTSTATUS_REJECTED;
+    }
+
   }
 
 }



Mime
View raw message