manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1608109 [2/2] - in /manifoldcf/trunk: connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/ connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation...
Date Sat, 05 Jul 2014 18:53:02 GMT
Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java Sat Jul  5 18:53:01 2014
@@ -72,16 +72,7 @@ public interface IIncrementalIngester
   *@param spec is the output specification.
   *@return the description string.
   */
-  @Deprecated
-  public String getOutputDescription(String outputConnectionName, OutputSpecification spec)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Get an output version string for a document.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param spec is the output specification.
-  *@return the description string.
-  */
-  public String getOutputDescription(String outputConnectionName, Specification spec)
+  public VersionContext getOutputDescription(String outputConnectionName, Specification spec)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Get transformation version string for a document.
@@ -89,7 +80,7 @@ public interface IIncrementalIngester
   *@param spec is the transformation specification.
   *@return the description string.
   */
-  public String getTransformationDescription(String transformationConnectionName, Specification spec)
+  public VersionContext getTransformationDescription(String transformationConnectionName, Specification spec)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Check if a mime type is indexable.
@@ -104,16 +95,6 @@ public interface IIncrementalIngester
     IOutputCheckActivity activity)
     throws ManifoldCFException, ServiceInterruption;
 
-  /** Check if a mime type is indexable.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param outputDescription is the output description string.
-  *@param mimeType is the mime type to check.
-  *@return true if the mimeType is indexable.
-  */
-  @Deprecated
-  public boolean checkMimeTypeIndexable(String outputConnectionName, String outputDescription, String mimeType)
-    throws ManifoldCFException, ServiceInterruption;
-
   /** Check if a file is indexable.
   *@param pipelineSpecification is the pipeline specification.
   *@param localFile is the local file to check.
@@ -126,16 +107,6 @@ public interface IIncrementalIngester
     IOutputCheckActivity activity)
     throws ManifoldCFException, ServiceInterruption;
 
-  /** Check if a file is indexable.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param outputDescription is the output description string.
-  *@param localFile is the local file to check.
-  *@return true if the local file is indexable.
-  */
-  @Deprecated
-  public boolean checkDocumentIndexable(String outputConnectionName, String outputDescription, File localFile)
-    throws ManifoldCFException, ServiceInterruption;
-
   /** Pre-determine whether a document's length is indexable by this connector.  This method is used by participating repository connectors
   * to help filter out documents that are too long to be indexable.
   *@param pipelineSpecification is the pipeline specification.
@@ -149,17 +120,6 @@ public interface IIncrementalIngester
     IOutputCheckActivity activity)
     throws ManifoldCFException, ServiceInterruption;
 
-  /** Pre-determine whether a document's length is indexable by this connector.  This method is used by participating repository connectors
-  * to help filter out documents that are too long to be indexable.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param outputDescription is the output description string.
-  *@param length is the length of the document.
-  *@return true if the file is indexable.
-  */
-  @Deprecated
-  public boolean checkLengthIndexable(String outputConnectionName, String outputDescription, long length)
-    throws ManifoldCFException, ServiceInterruption;
-
   /** Pre-determine whether a document's URL is indexable by this connector.  This method is used by participating repository connectors
   * to help filter out documents that not indexable.
   *@param pipelineSpecification is the pipeline specification.
@@ -173,17 +133,6 @@ public interface IIncrementalIngester
     IOutputCheckActivity activity)
     throws ManifoldCFException, ServiceInterruption;
 
-  /** Pre-determine whether a document's URL is indexable by this connector.  This method is used by participating repository connectors
-  * to help filter out documents that not indexable.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param outputDescription is the output description string.
-  *@param url is the url of the document.
-  *@return true if the file is indexable.
-  */
-  @Deprecated
-  public boolean checkURLIndexable(String outputConnectionName, String outputDescription, String url)
-    throws ManifoldCFException, ServiceInterruption;
-
   /** Determine whether we need to fetch or refetch a document.
   * Pass in information including the pipeline specification with existing version info, plus new document and parameter version strings.
   * If no outputs need to be updated, then this method will return false.  If any outputs need updating, then true is returned.
@@ -203,23 +152,6 @@ public interface IIncrementalIngester
   /** Record a document version, but don't ingest it.
   * The purpose of this method is to keep track of the frequency at which ingestion "attempts" take place.
   * ServiceInterruption is thrown if this action must be rescheduled.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hashed document identifier.
-  *@param documentVersion is the document version.
-  *@param recordTime is the time at which the recording took place, in milliseconds since epoch.
-  *@param activities is the object used in case a document needs to be removed from the output index as the result of this operation.
-  */
-  @Deprecated
-  public void documentRecord(String outputConnectionName,
-    String identifierClass, String identifierHash,
-    String documentVersion, long recordTime,
-    IOutputActivity activities)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Record a document version, but don't ingest it.
-  * The purpose of this method is to keep track of the frequency at which ingestion "attempts" take place.
-  * ServiceInterruption is thrown if this action must be rescheduled.
   *@param pipelineSpecificationBasic is the basic pipeline specification needed.
   *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
   *@param identifierHash is the hashed document identifier.
@@ -239,64 +171,6 @@ public interface IIncrementalIngester
   * method also REMOVES ALL OLD METADATA.  When complete, the index will contain only the metadata
   * described by the RepositoryDocument object passed to this method.
   * ServiceInterruption is thrown if the document ingestion must be rescheduled.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hashed document identifier.
-  *@param documentVersion is the document version.
-  *@param outputVersion is the output version string constructed from the output specification by the output connector.
-  *@param authorityName is the name of the authority associated with the document, if any.
-  *@param data is the document data.  The data is closed after ingestion is complete.
-  *@param ingestTime is the time at which the ingestion took place, in milliseconds since epoch.
-  *@param documentURI is the URI of the document, which will be used as the key of the document in the index.
-  *@param activities is an object providing a set of methods that the implementer can use to perform the operation.
-  *@return true if the ingest was ok, false if the ingest is illegal (and should not be repeated).
-  */
-  @Deprecated
-  public boolean documentIngest(String outputConnectionName,
-    String identifierClass, String identifierHash,
-    String documentVersion,
-    String outputVersion,
-    String authorityName,
-    RepositoryDocument data,
-    long ingestTime, String documentURI,
-    IOutputActivity activities)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Ingest a document.
-  * This ingests the document, and notes it.  If this is a repeat ingestion of the document, this
-  * method also REMOVES ALL OLD METADATA.  When complete, the index will contain only the metadata
-  * described by the RepositoryDocument object passed to this method.
-  * ServiceInterruption is thrown if the document ingestion must be rescheduled.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hashed document identifier.
-  *@param documentVersion is the document version.
-  *@param parameterVersion is the forced parameter version.
-  *@param outputVersion is the output version string constructed from the output specification by the output connector.
-  *@param authorityName is the name of the authority associated with the document, if any.
-  *@param data is the document data.  The data is closed after ingestion is complete.
-  *@param ingestTime is the time at which the ingestion took place, in milliseconds since epoch.
-  *@param documentURI is the URI of the document, which will be used as the key of the document in the index.
-  *@param activities is an object providing a set of methods that the implementer can use to perform the operation.
-  *@return true if the ingest was ok, false if the ingest is illegal (and should not be repeated).
-  */
-  @Deprecated
-  public boolean documentIngest(String outputConnectionName,
-    String identifierClass, String identifierHash,
-    String documentVersion,
-    String outputVersion,
-    String parameterVersion,
-    String authorityName,
-    RepositoryDocument data,
-    long ingestTime, String documentURI,
-    IOutputActivity activities)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Ingest a document.
-  * This ingests the document, and notes it.  If this is a repeat ingestion of the document, this
-  * method also REMOVES ALL OLD METADATA.  When complete, the index will contain only the metadata
-  * described by the RepositoryDocument object passed to this method.
-  * ServiceInterruption is thrown if the document ingestion must be rescheduled.
   *@param pipelineSpecificationWithVersions is the pipeline specification with already-fetched output versioning information.
   *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
   *@param identifierHash is the hashed document identifier.
@@ -323,32 +197,6 @@ public interface IIncrementalIngester
 
   /** Note the fact that we checked a document (and found that it did not need to be ingested, because the
   * versions agreed).
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes are the set of document identifier hashes.
-  *@param checkTime is the time at which the check took place, in milliseconds since epoch.
-  */
-  @Deprecated
-  public void documentCheckMultiple(String outputConnectionName,
-    String[] identifierClasses, String[] identifierHashes,
-    long checkTime)
-    throws ManifoldCFException;
-
-  /** Note the fact that we checked a document (and found that it did not need to be ingested, because the
-  * versions agreed).
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hashed document identifier.
-  *@param checkTime is the time at which the check took place, in milliseconds since epoch.
-  */
-  @Deprecated
-  public void documentCheck(String outputConnectionName,
-    String identifierClass, String identifierHash,
-    long checkTime)
-    throws ManifoldCFException;
-
-  /** Note the fact that we checked a document (and found that it did not need to be ingested, because the
-  * versions agreed).
   *@param pipelineSpecificationBasic is a pipeline specification.
   *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
   *@param identifierHashes are the set of document identifier hashes.
@@ -374,42 +222,6 @@ public interface IIncrementalIngester
     throws ManifoldCFException;
 
   /** Delete multiple documents from the search engine index.
-  *@param outputConnectionNames are the names of the output connections associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes is tha array of document identifier hashes if the documents.
-  *@param activities is the object to use to log the details of the ingestion attempt.  May be null.
-  */
-  @Deprecated
-  public void documentDeleteMultiple(String[] outputConnectionNames,
-    String[] identifierClasses, String[] identifierHashes,
-    IOutputRemoveActivity activities)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Delete multiple documents from the search engine index.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes is tha array of document identifier hashes if the documents.
-  *@param activities is the object to use to log the details of the ingestion attempt.  May be null.
-  */
-  @Deprecated
-  public void documentDeleteMultiple(String outputConnectionName,
-    String[] identifierClasses, String[] identifierHashes,
-    IOutputRemoveActivity activities)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Delete a document from the search engine index.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hash of the id of the document.
-  *@param activities is the object to use to log the details of the ingestion attempt.  May be null.
-  */
-  @Deprecated
-  public void documentDelete(String outputConnectionName,
-    String identifierClass, String identifierHash,
-    IOutputRemoveActivity activities)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Delete multiple documents from the search engine index.
   *@param pipelineSpecificationBasics are the pipeline specifications associated with the documents.
   *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
   *@param identifierHashes is tha array of document identifier hashes if the documents.
@@ -445,41 +257,6 @@ public interface IIncrementalIngester
     IOutputRemoveActivity activities)
     throws ManifoldCFException, ServiceInterruption;
 
-  /** Look up ingestion data for a SET of documents.
-  *@param outputConnectionNames are the names of the output connections associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes is the array of document identifier hashes to look up.
-  *@return the array of document data.  Null will come back for any identifier that doesn't
-  * exist in the index.
-  */
-  @Deprecated
-  public DocumentIngestStatus[] getDocumentIngestDataMultiple(String[] outputConnectionNames,
-    String[] identifierClasses, String[] identifierHashes)
-    throws ManifoldCFException;
-
-  /** Look up ingestion data for a SET of documents.
-  *@param outputConnectionName is the names of the output connection associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes is the array of document identifier hashes to look up.
-  *@return the array of document data.  Null will come back for any identifier that doesn't
-  * exist in the index.
-  */
-  @Deprecated
-  public DocumentIngestStatus[] getDocumentIngestDataMultiple(String outputConnectionName,
-    String[] identifierClasses, String[] identifierHashes)
-    throws ManifoldCFException;
-
-  /** Look up ingestion data for a documents.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hash of the id of the document.
-  *@return the current document's ingestion data, or null if the document is not currently ingested.
-  */
-  @Deprecated
-  public DocumentIngestStatus getDocumentIngestData(String outputConnectionName,
-    String identifierClass, String identifierHash)
-    throws ManifoldCFException;
-
   /** Look up ingestion data for a set of documents.
   *@param rval is a map of output key to document data, in no particular order, which will be loaded with all matching results.
   *@param pipelineSpecificationBasics are the pipeline specifications corresponding to the identifier classes and hashes.
@@ -518,30 +295,6 @@ public interface IIncrementalIngester
 
   /** Calculate the average time interval between changes for a document.
   * This is based on the data gathered for the document.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes is the hashes of the ids of the documents.
-  *@return the number of milliseconds between changes, or 0 if this cannot be calculated.
-  */
-  @Deprecated
-  public long[] getDocumentUpdateIntervalMultiple(String outputConnectionName,
-    String[] identifierClasses, String[] identifierHashes)
-    throws ManifoldCFException;
-
-  /** Calculate the average time interval between changes for a document.
-  * This is based on the data gathered for the document.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hash of the id of the document.
-  *@return the number of milliseconds between changes, or 0 if this cannot be calculated.
-  */
-  @Deprecated
-  public long getDocumentUpdateInterval(String outputConnectionName,
-    String identifierClass, String identifierHash)
-    throws ManifoldCFException;
-
-  /** Calculate the average time interval between changes for a document.
-  * This is based on the data gathered for the document.
   *@param pipelineSpecificationBasic is the basic pipeline specification.
   *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
   *@param identifierHashes is the hashes of the ids of the documents.

Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java Sat Jul  5 18:53:01 2014
@@ -43,7 +43,7 @@ public interface IPipelineConnector exte
   /** Document permanently rejected */
   public final static int DOCUMENTSTATUS_REJECTED = 1;
 
-  /** Get a pipeline version string, given a pipeline specification object.  The version string is used to
+  /** Get a pipeline version object, given a pipeline specification object.  The version string is used to
   * uniquely describe the pertinent details of the specification and the configuration, to allow the Connector 
   * Framework to determine whether a document will need to be processed again.
   * Note that the contents of any document cannot be considered by this method; only configuration and specification information
@@ -51,10 +51,10 @@ public interface IPipelineConnector exte
   *
   * This method presumes that the underlying connector object has been configured.
   *@param spec is the current pipeline specification object for this connection for the job that is doing the crawling.
-  *@return a string, of unlimited length, which uniquely describes configuration and specification in such a way that
+  *@return a version object, including a string of unlimited length, which uniquely describes configuration and specification in such a way that
   * if two such strings are equal, nothing that affects how or whether the document is indexed will be different.
   */
-  public String getPipelineDescription(Specification spec)
+  public VersionContext getPipelineDescription(Specification spec)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Detect if a mime type is acceptable or not.  This method is used to determine whether it makes sense to fetch a document
@@ -64,7 +64,7 @@ public interface IPipelineConnector exte
   *@param checkActivity is an object including the activities that can be performed by this method.
   *@return true if the mime type can be accepted by this connector.
   */
-  public boolean checkMimeTypeIndexable(String pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
+  public boolean checkMimeTypeIndexable(VersionContext pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Pre-determine whether a document (passed here as a File object) is acceptable or not.  This method is
@@ -75,7 +75,7 @@ public interface IPipelineConnector exte
   *@param checkActivity is an object including the activities that can be done by this method.
   *@return true if the file is acceptable, false if not.
   */
-  public boolean checkDocumentIndexable(String pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
+  public boolean checkDocumentIndexable(VersionContext pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Pre-determine whether a document's length is acceptable.  This method is used
@@ -85,7 +85,7 @@ public interface IPipelineConnector exte
   *@param checkActivity is an object including the activities that can be done by this method.
   *@return true if the file is acceptable, false if not.
   */
-  public boolean checkLengthIndexable(String pipelineDescription, long length, IOutputCheckActivity checkActivity)
+  public boolean checkLengthIndexable(VersionContext pipelineDescription, long length, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Pre-determine whether a document's URL is acceptable.  This method is used
@@ -95,7 +95,7 @@ public interface IPipelineConnector exte
   *@param checkActivity is an object including the activities that can be done by this method.
   *@return true if the file is acceptable, false if not.
   */
-  public boolean checkURLIndexable(String pipelineDescription, String url, IOutputCheckActivity checkActivity)
+  public boolean checkURLIndexable(VersionContext pipelineDescription, String url, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Add (or replace) a document in the output data store using the connector.
@@ -114,7 +114,7 @@ public interface IPipelineConnector exte
   *@return the document status (accepted or permanently rejected).
   *@throws IOException only if there's a stream error reading the document data.
   */
-  public int addOrReplaceDocumentWithException(String documentURI, String pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+  public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
     throws ManifoldCFException, ServiceInterruption, IOException;
 
   // UI support methods.

Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineSpecification.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineSpecification.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineSpecification.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineSpecification.java Sat Jul  5 18:53:01 2014
@@ -36,6 +36,6 @@ public interface IPipelineSpecification
   *@param stage is the stage to get the connection name for.
   *@return the description string that stage.
   */
-  public String getStageDescriptionString(int stage);
+  public VersionContext getStageDescriptionString(int stage);
   
 }

Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java Sat Jul  5 18:53:01 2014
@@ -89,10 +89,10 @@ public abstract class BaseOutputConnecto
   *@return true if the mime type can be accepted by this connector.
   */
   @Override
-  public boolean checkMimeTypeIndexable(String pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
+  public boolean checkMimeTypeIndexable(VersionContext pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption
   {
-    return checkMimeTypeIndexable(pipelineDescription, mimeType);
+    return checkMimeTypeIndexable(pipelineDescription.getVersionString(), mimeType);
   }
 
   /** Detect if a mime type is indexable or not.  This method is used by participating repository connectors to pre-filter the number of
@@ -127,10 +127,10 @@ public abstract class BaseOutputConnecto
   *@return true if the file is acceptable, false if not.
   */
   @Override
-  public boolean checkDocumentIndexable(String pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
+  public boolean checkDocumentIndexable(VersionContext pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption
   {
-    return checkDocumentIndexable(pipelineDescription, localFile);
+    return checkDocumentIndexable(pipelineDescription.getVersionString(), localFile);
   }
 
   /** Pre-determine whether a document (passed here as a File object) is indexable by this connector.  This method is used by participating
@@ -166,10 +166,10 @@ public abstract class BaseOutputConnecto
   *@return true if the file is acceptable, false if not.
   */
   @Override
-  public boolean checkLengthIndexable(String pipelineDescription, long length, IOutputCheckActivity checkActivity)
+  public boolean checkLengthIndexable(VersionContext pipelineDescription, long length, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption
   {
-    return checkLengthIndexable(pipelineDescription, length);
+    return checkLengthIndexable(pipelineDescription.getVersionString(), length);
   }
 
   /** Pre-determine whether a document's length is indexable by this connector.  This method is used by participating repository connectors
@@ -192,10 +192,10 @@ public abstract class BaseOutputConnecto
   *@return true if the file is acceptable, false if not.
   */
   @Override
-  public boolean checkURLIndexable(String pipelineDescription, String url, IOutputCheckActivity checkActivity)
+  public boolean checkURLIndexable(VersionContext pipelineDescription, String url, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption
   {
-    return checkURLIndexable(pipelineDescription, url);
+    return checkURLIndexable(pipelineDescription.getVersionString(), url);
   }
 
   /** Pre-determine whether a document's URL is indexable by this connector.  This method is used by participating repository connectors
@@ -222,10 +222,10 @@ public abstract class BaseOutputConnecto
   * if two such strings are equal, nothing that affects how or whether the document is indexed will be different.
   */
   @Override
-  public String getPipelineDescription(Specification spec)
+  public VersionContext getPipelineDescription(Specification spec)
     throws ManifoldCFException, ServiceInterruption
   {
-    return getOutputDescription((OutputSpecification)spec);
+    return new VersionContext(getOutputDescription((OutputSpecification)spec),params,spec);
   }
 
   /** Get an output version string, given an output specification.  The output version string is used to uniquely describe the pertinent details of
@@ -263,10 +263,10 @@ public abstract class BaseOutputConnecto
   *@throws IOException only if there's a stream error reading the document data.
   */
   @Override
-  public int addOrReplaceDocumentWithException(String documentURI, String pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+  public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
     throws ManifoldCFException, ServiceInterruption, IOException
   {
-    return addOrReplaceDocument(documentURI, pipelineDescription, document, authorityNameString, activities);
+    return addOrReplaceDocument(documentURI, pipelineDescription.getVersionString(), document, authorityNameString, activities);
   }
 
   /** Add (or replace) a document in the output data store using the connector.

Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java Sat Jul  5 18:53:01 2014
@@ -78,7 +78,7 @@ public abstract class BaseTransformation
   *@return true if the mime type can be accepted by this connector.
   */
   @Override
-  public boolean checkMimeTypeIndexable(String pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
+  public boolean checkMimeTypeIndexable(VersionContext pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption
   {
     return checkActivity.checkMimeTypeIndexable(mimeType);
@@ -93,7 +93,7 @@ public abstract class BaseTransformation
   *@return true if the file is acceptable, false if not.
   */
   @Override
-  public boolean checkDocumentIndexable(String pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
+  public boolean checkDocumentIndexable(VersionContext pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption
   {
     return checkActivity.checkDocumentIndexable(localFile);
@@ -107,7 +107,7 @@ public abstract class BaseTransformation
   *@return true if the file is acceptable, false if not.
   */
   @Override
-  public boolean checkLengthIndexable(String pipelineDescription, long length, IOutputCheckActivity checkActivity)
+  public boolean checkLengthIndexable(VersionContext pipelineDescription, long length, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption
   {
     return checkActivity.checkLengthIndexable(length);
@@ -121,7 +121,7 @@ public abstract class BaseTransformation
   *@return true if the file is acceptable, false if not.
   */
   @Override
-  public boolean checkURLIndexable(String pipelineDescription, String url, IOutputCheckActivity checkActivity)
+  public boolean checkURLIndexable(VersionContext pipelineDescription, String url, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption
   {
     return checkActivity.checkURLIndexable(url);
@@ -139,10 +139,10 @@ public abstract class BaseTransformation
   * if two such strings are equal, nothing that affects how or whether the document is indexed will be different.
   */
   @Override
-  public String getPipelineDescription(Specification spec)
+  public VersionContext getPipelineDescription(Specification spec)
     throws ManifoldCFException, ServiceInterruption
   {
-    return "";
+    return new VersionContext("",params,spec);
   }
 
   /** Add (or replace) a document in the output data store using the connector.
@@ -162,7 +162,7 @@ public abstract class BaseTransformation
   *@throws IOException only if there's a stream error reading the document data.
   */
   @Override
-  public int addOrReplaceDocumentWithException(String documentURI, String pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+  public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
     throws ManifoldCFException, ServiceInterruption, IOException
   {
     return DOCUMENTSTATUS_REJECTED;

Added: manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/VersionContext.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/VersionContext.java?rev=1608109&view=auto
==============================================================================
--- manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/VersionContext.java (added)
+++ manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/VersionContext.java Sat Jul  5 18:53:01 2014
@@ -0,0 +1,67 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.interfaces;
+
+/** An instance of this class represents a version string, in combination with the configuration parameters
+* and specification that produced it.  Some clients will use the version string (e.g. the database), while others
+* may find it more convenient to use the parameters or the specification.  However:
+* (1) It is ALWAYS wrong to use data from configuration or specification that is NOT represented in some
+*     way in the version string, either by exact representation, or by some proxy value;
+* (2) Configuration and Specification are guaranteed to be the identical ones which were used during creation
+*     of the version string;
+* (3) Configuration and Specification are provided as CONVENIENCES; they are not to be considered primary
+*    data for these objects.
+*/
+public class VersionContext
+{
+  public static final String _rcsid = "@(#)$Id$";
+
+  // Member variables
+  protected final String versionString;
+  protected final ConfigParams params;
+  protected final Specification specification;
+
+  /** Constructor.
+  */
+  public VersionContext(String versionString, ConfigParams params, Specification specification)
+  {
+    this.versionString = versionString;
+    this.params = params;
+    this.specification = specification;
+  }
+
+  /** Retrieve the version String */
+  public String getVersionString()
+  {
+    return versionString;
+  }
+  
+  /** Retrieve the configuration parameters */
+  public ConfigParams getParams()
+  {
+    return params;
+  }
+  
+  /** Retrieve the specification */
+  public Specification getSpecification()
+  {
+    return specification;
+  }
+  
+}

Propchange: manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/VersionContext.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/VersionContext.java
------------------------------------------------------------------------------
    svn:keywords = Id

Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/connectors/BaseRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/connectors/BaseRepositoryConnector.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/connectors/BaseRepositoryConnector.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/connectors/BaseRepositoryConnector.java Sat Jul  5 18:53:01 2014
@@ -150,7 +150,7 @@ public abstract class BaseRepositoryConn
   *@return an updated seeding version string, to be stored with the job.
   */
   @Override
-  public String addSeedDocumentsWithVersion(ISeedingActivity activities, Specification spec,
+  public String addSeedDocuments(ISeedingActivity activities, Specification spec,
     String lastSeedVersion, long seedTime, int jobMode)
     throws ManifoldCFException, ServiceInterruption
   {
@@ -325,6 +325,43 @@ public abstract class BaseRepositoryConn
   }
 
   /** Get document versions given an array of document identifiers.
+  * This method is called for EVERY document that is considered. It is therefore important to perform
+  * as little work as possible here.
+  * The connector will be connected before this method can be called.
+  *@param documentVersions is the versions object, to be filled in by this method.
+  *@param documentIdentifiers is the array of local document identifiers, as understood by this connector.
+  *@param oldVersions is the corresponding array of version strings that have been saved for the document identifiers.
+  *   A null value indicates that this is a first-time fetch, while an empty string indicates that the previous document
+  *   had an empty version string.
+  *@param activities is the interface this method should use to perform whatever framework actions are desired.
+  *@param spec is the current document specification for the current job.  If there is a dependency on this
+  * specification, then the version string should include the pertinent data, so that reingestion will occur
+  * when the specification changes.  This is primarily useful for metadata.
+  *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
+  *@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one.
+  */
+  @Override
+  public void getDocumentVersions(
+    DocumentVersions documentVersions,
+    String[] documentIdentifiers, String[] oldVersions,
+    IVersionActivity activities,
+    Specification spec, int jobMode, boolean usesDefaultAuthority)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    String[] rval = getDocumentVersions(documentIdentifiers,oldVersions,activities,
+      spec,jobMode,usesDefaultAuthority);
+    for (int i = 0; i < rval.length; i++)
+    {
+      if (rval[i] != null)
+      {
+        documentVersions.setDocumentVersion(documentIdentifiers[i],new VersionContext(rval[i],params,spec));
+        if (rval[i].length() == 0)
+          documentVersions.alwaysRefetch(documentIdentifiers[i]);
+      }
+    }
+  }
+
+  /** Get document versions given an array of document identifiers.
   * This method is called for EVERY document that is considered. It is
   * therefore important to perform as little work as possible here.
   *@param documentIdentifiers is the array of local document identifiers, as understood by this connector.
@@ -341,7 +378,6 @@ public abstract class BaseRepositoryConn
   * Empty version strings indicate that there is no versioning ability for the corresponding document, and the document
   * will always be processed.
   */
-  @Override
   public String[] getDocumentVersions(String[] documentIdentifiers, String[] oldVersions, IVersionActivity activities,
     Specification spec, int jobMode, boolean usesDefaultAuthority)
     throws ManifoldCFException, ServiceInterruption
@@ -448,10 +484,36 @@ public abstract class BaseRepositoryConn
   * the getDocumentVersions() method, including those that returned null versions.  It may be used to free resources
   * committed during the getDocumentVersions() method.  It is guaranteed to be called AFTER any calls to
   * processDocuments() for the documents in question.
+  * The connector will be connected before this method can be called.
   *@param documentIdentifiers is the set of document identifiers.
-  *@param versions is the corresponding set of version identifiers (individual identifiers may be null).
+  *@param versions is the corresponding set of version strings (individual identifiers may have no version).
   */
   @Override
+  public void releaseDocumentVersions(String[] documentIdentifiers, DocumentVersions versions)
+    throws ManifoldCFException
+  {
+    String[] versionStrings = new String[documentIdentifiers.length];
+    for (int i = 0; i < versionStrings.length; i++)
+    {
+      VersionContext vc = versions.getDocumentVersion(documentIdentifiers[i]);
+      boolean alwaysFetch = versions.isAlwaysRefetch(documentIdentifiers[i]);
+      if (alwaysFetch)
+        versionStrings[i] = "";
+      else if (vc == null)
+        versionStrings[i] = null;
+      else
+        versionStrings[i] = vc.getVersionString();
+    }
+    releaseDocumentVersions(documentIdentifiers,versionStrings);
+  }
+
+  /** Free a set of documents.  This method is called for all documents whose versions have been fetched using
+  * the getDocumentVersions() method, including those that returned null versions.  It may be used to free resources
+  * committed during the getDocumentVersions() method.  It is guaranteed to be called AFTER any calls to
+  * processDocuments() for the documents in question.
+  *@param documentIdentifiers is the set of document identifiers.
+  *@param versions is the corresponding set of version identifiers (individual identifiers may be null).
+  */
   public void releaseDocumentVersions(String[] documentIdentifiers, String[] versions)
     throws ManifoldCFException
   {
@@ -472,6 +534,42 @@ public abstract class BaseRepositoryConn
   * This is the method that should cause each document to be fetched, processed, and the results either added
   * to the queue of documents for the current job, and/or entered into the incremental ingestion manager.
   * The document specification allows this class to filter what is done based on the job.
+  * The connector will be connected before this method can be called.
+  *@param documentIdentifiers is the set of document identifiers to process.
+  *@param versions are the version strings returned by getDocumentVersions() above.
+  *@param activities is the interface this method should use to queue up new document references
+  * and ingest documents.
+  *@param scanOnly is an array corresponding to the document identifiers.  It is set to true to indicate when the processing
+  * should only find other references, and should not actually call the ingestion methods.
+  *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
+  */
+  @Override
+  public void processDocuments(String[] documentIdentifiers, DocumentVersions versions, IProcessActivity activities,
+    boolean[] scanOnly, int jobMode)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    Specification spec = null;
+    String[] versionStrings = new String[documentIdentifiers.length];
+    for (int i = 0; i < versionStrings.length; i++)
+    {
+      VersionContext vc = versions.getDocumentVersion(documentIdentifiers[i]);
+      if (vc != null)
+        spec = vc.getSpecification();
+      boolean alwaysFetch = versions.isAlwaysRefetch(documentIdentifiers[i]);
+      if (alwaysFetch)
+        versionStrings[i] = "";
+      else if (vc == null)
+        versionStrings[i] = null;
+      else
+        versionStrings[i] = vc.getVersionString();
+    }
+    processDocuments(documentIdentifiers,versionStrings,activities,spec,scanOnly,jobMode);
+  }
+
+  /** Process a set of documents.
+  * This is the method that should cause each document to be fetched, processed, and the results either added
+  * to the queue of documents for the current job, and/or entered into the incremental ingestion manager.
+  * The document specification allows this class to filter what is done based on the job.
   *@param documentIdentifiers is the set of document identifiers to process.
   *@param versions is the corresponding document versions to process, as returned by getDocumentVersions() above.
   *       The implementation may choose to ignore this parameter and always process the current version.
@@ -482,7 +580,6 @@ public abstract class BaseRepositoryConn
   * should only find other references, and should not actually call the ingestion methods.
   *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
   */
-  @Override
   public void processDocuments(String[] documentIdentifiers, String[] versions, IProcessActivity activities,
     Specification spec, boolean[] scanOnly, int jobMode)
     throws ManifoldCFException, ServiceInterruption

Added: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/DocumentVersions.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/DocumentVersions.java?rev=1608109&view=auto
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/DocumentVersions.java (added)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/DocumentVersions.java Sat Jul  5 18:53:01 2014
@@ -0,0 +1,74 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.crawler.interfaces;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import java.util.*;
+
+/** This class represents a set of document versions, organized by document identifier.
+* It's part of the IRepositoryConnector API.
+*/
+public class DocumentVersions
+{
+  public static final String _rcsid = "@(#)$Id$";
+
+  protected final Map<String,VersionContext> documentVersions = new HashMap<String,VersionContext>();
+  protected final Set<String> alwaysRefetch = new HashSet<String>();
+  
+  /** Constructor */
+  public DocumentVersions()
+  {
+  }
+  
+  /** Set a non-special document version.
+  *@param documentIdentifier is the document identifier.
+  *@param documentVersion is the document version.
+  */
+  public void setDocumentVersion(String documentIdentifier, VersionContext documentVersion)
+  {
+    documentVersions.put(documentIdentifier,documentVersion);
+  }
+  
+  /** Signal to always refetch document.
+  *@param documentIdentifier is the document identifier.
+  */
+  public void alwaysRefetch(String documentIdentifier)
+  {
+    alwaysRefetch.add(documentIdentifier);
+  }
+  
+  /** Get the document version, if any.
+  *@param documentIdentifier is the document identifier.
+  *@return the document version, if any.  Null indicates that no such document was found.
+  */
+  public VersionContext getDocumentVersion(String documentIdentifier)
+  {
+    return documentVersions.get(documentIdentifier);
+  }
+  
+  /** Check whether we should always refetch a specified document.
+  *@param documentIdentifier is the document identifier.
+  *@return true if we are directed to always refetch.  False will be returned by default.
+  */
+  public boolean isAlwaysRefetch(String documentIdentifier)
+  {
+    return alwaysRefetch.contains(documentIdentifier);
+  }
+  
+}

Propchange: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/DocumentVersions.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/DocumentVersions.java
------------------------------------------------------------------------------
    svn:keywords = Id

Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IRepositoryConnector.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IRepositoryConnector.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IRepositoryConnector.java Sat Jul  5 18:53:01 2014
@@ -178,7 +178,7 @@ public interface IRepositoryConnector ex
   *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
   *@return an updated seeding version string, to be stored with the job.
   */
-  public String addSeedDocumentsWithVersion(ISeedingActivity activities, Specification spec,
+  public String addSeedDocuments(ISeedingActivity activities, Specification spec,
     String lastSeedVersion, long seedTime, int jobMode)
     throws ManifoldCFException, ServiceInterruption;
 
@@ -186,6 +186,7 @@ public interface IRepositoryConnector ex
   * This method is called for EVERY document that is considered. It is therefore important to perform
   * as little work as possible here.
   * The connector will be connected before this method can be called.
+  *@param documentVersions is the versions object, to be filled in by this method.
   *@param documentIdentifiers is the array of local document identifiers, as understood by this connector.
   *@param oldVersions is the corresponding array of version strings that have been saved for the document identifiers.
   *   A null value indicates that this is a first-time fetch, while an empty string indicates that the previous document
@@ -196,11 +197,11 @@ public interface IRepositoryConnector ex
   * when the specification changes.  This is primarily useful for metadata.
   *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
   *@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one.
-  *@return the corresponding version strings, with null in the places where the document no longer exists.
-  * Empty version strings indicate that there is no versioning ability for the corresponding document, and the document
-  * will always be processed.
   */
-  public String[] getDocumentVersions(String[] documentIdentifiers, String[] oldVersions, IVersionActivity activities,
+  public void getDocumentVersions(
+    DocumentVersions documentVersions,
+    String[] documentIdentifiers, String[] oldVersions,
+    IVersionActivity activities,
     Specification spec, int jobMode, boolean usesDefaultAuthority)
     throws ManifoldCFException, ServiceInterruption;
 
@@ -210,17 +211,15 @@ public interface IRepositoryConnector ex
   * The document specification allows this class to filter what is done based on the job.
   * The connector will be connected before this method can be called.
   *@param documentIdentifiers is the set of document identifiers to process.
-  *@param versions is the corresponding document versions to process, as returned by getDocumentVersions() above.
-  *       The implementation may choose to ignore this parameter and always process the current version.
+  *@param versions are the version strings returned by getDocumentVersions() above.
   *@param activities is the interface this method should use to queue up new document references
   * and ingest documents.
-  *@param spec is the document specification.
   *@param scanOnly is an array corresponding to the document identifiers.  It is set to true to indicate when the processing
   * should only find other references, and should not actually call the ingestion methods.
   *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
   */
-  public void processDocuments(String[] documentIdentifiers, String[] versions, IProcessActivity activities,
-    Specification spec, boolean[] scanOnly, int jobMode)
+  public void processDocuments(String[] documentIdentifiers, DocumentVersions versions, IProcessActivity activities,
+    boolean[] scanOnly, int jobMode)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Free a set of documents.  This method is called for all documents whose versions have been fetched using
@@ -229,9 +228,9 @@ public interface IRepositoryConnector ex
   * processDocuments() for the documents in question.
   * The connector will be connected before this method can be called.
   *@param documentIdentifiers is the set of document identifiers.
-  *@param versions is the corresponding set of version identifiers (individual identifiers may be null).
+  *@param versions is the corresponding set of version strings (individual identifiers may have no version).
   */
-  public void releaseDocumentVersions(String[] documentIdentifiers, String[] versions)
+  public void releaseDocumentVersions(String[] documentIdentifiers, DocumentVersions versions)
     throws ManifoldCFException;
 
   /** Get the maximum number of documents to amalgamate together into one batch, for this connector.

Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecification.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecification.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecification.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecification.java Sat Jul  5 18:53:01 2014
@@ -27,17 +27,17 @@ import org.apache.manifoldcf.crawler.int
 public class PipelineSpecification implements IPipelineSpecification
 {
   protected final IPipelineSpecificationBasic basicSpecification;
-  protected final String[] pipelineDescriptionStrings;
+  protected final VersionContext[] pipelineDescriptionStrings;
     
   public PipelineSpecification(IPipelineSpecificationBasic basicSpecification, IJobDescription job, IIncrementalIngester ingester)
     throws ManifoldCFException, ServiceInterruption
   {
     this.basicSpecification = basicSpecification;
-    this.pipelineDescriptionStrings = new String[basicSpecification.getStageCount()];
+    this.pipelineDescriptionStrings = new VersionContext[basicSpecification.getStageCount()];
     for (int i = 0; i < pipelineDescriptionStrings.length; i++)
     {
       // Note: this needs to change when output connections become part of the pipeline
-      String descriptionString;
+      VersionContext descriptionString;
       if (basicSpecification.checkStageOutputConnection(i))
       {
         descriptionString = ingester.getOutputDescription(basicSpecification.getStageConnectionName(i),job.getPipelineStageSpecification(i));
@@ -64,7 +64,7 @@ public class PipelineSpecification imple
   *@return the description string that stage.
   */
   @Override
-  public String getStageDescriptionString(int stage)
+  public VersionContext getStageDescriptionString(int stage)
   {
     return pipelineDescriptionStrings[stage];
   }

Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/SeedingThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/SeedingThread.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/SeedingThread.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/SeedingThread.java Sat Jul  5 18:53:01 2014
@@ -147,7 +147,7 @@ public class SeedingThread extends Threa
                     if (Logging.threads.isDebugEnabled())
                       Logging.threads.debug("Seeding thread: Getting seeds for job "+jobID.toString());
 
-                    newSeedingVersion = connector.addSeedDocumentsWithVersion(activity,jobDescription.getSpecification(),lastSeedingVersion,currentTime,jobType);
+                    newSeedingVersion = connector.addSeedDocuments(activity,jobDescription.getSpecification(),lastSeedingVersion,currentTime,jobType);
 
                     activity.doneSeeding(model==connector.MODEL_PARTIAL);
 

Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java Sat Jul  5 18:53:01 2014
@@ -150,7 +150,7 @@ public class StartupThread extends Threa
                     if (Logging.threads.isDebugEnabled())
                       Logging.threads.debug("Adding initial seed documents for job "+jobID.toString()+"...");
                     // Get the initial seed documents, and make sure those are added
-                    newSeedingVersion = connector.addSeedDocumentsWithVersion(activity,jobDescription.getSpecification(),lastSeedingVersion,currentTime,jobType);
+                    newSeedingVersion = connector.addSeedDocuments(activity,jobDescription.getSpecification(),lastSeedingVersion,currentTime,jobType);
                     // Flush anything left
                     activity.doneSeeding(model==connector.MODEL_PARTIAL);
                     if (Logging.threads.isDebugEnabled())

Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java Sat Jul  5 18:53:01 2014
@@ -81,7 +81,6 @@ public class WorkerThread extends Thread
       IRepositoryConnectorPool repositoryConnectorPool = RepositoryConnectorPoolFactory.make(threadContext);
       
       List<DocumentToProcess> fetchList = new ArrayList<DocumentToProcess>();
-      Map<String,String> versionMap = new HashMap<String,String>();
       List<QueuedDocument> finishList = new ArrayList<QueuedDocument>();
       Map<String,Integer> idHashIndexMap = new HashMap<String,Integer>();
 
@@ -175,7 +174,6 @@ public class WorkerThread extends Thread
             // Clear out all of our disposition lists
             fetchList.clear();
             finishList.clear();
-            versionMap.clear();
             deleteList.clear();
             ingesterCheckList.clear();
             hopcountremoveList.clear();
@@ -334,12 +332,14 @@ public class WorkerThread extends Thread
 
                     // === Fetch documents ===
                     // We start by getting the document version string.
-                    String[] newVersionStringArray = null;
+                    DocumentVersions documentVersions = new DocumentVersions();
+                    boolean successfulVersions = false;
                     try
                     {
-                      newVersionStringArray = connector.getDocumentVersions(currentDocIDArray,oldVersionStringArray,
+                      connector.getDocumentVersions(documentVersions,currentDocIDArray,oldVersionStringArray,
                         versionActivity,spec,jobType,isDefaultAuthority);
-
+                      successfulVersions = true;
+                      
                       if (Logging.threads.isDebugEnabled())
                         Logging.threads.debug("Worker thread done getting versions for "+Integer.toString(currentDocIDArray.length)+" documents");
 
@@ -398,7 +398,7 @@ public class WorkerThread extends Thread
                     }
 
                     // If version fetch was successful, the go on to processing phase
-                    if (newVersionStringArray != null)
+                    if (successfulVersions)
                     {
                       // This try{ } is for releasing document versions at the connector level.
                       try
@@ -429,10 +429,9 @@ public class WorkerThread extends Thread
                             // We call the incremental ingester to make the decision for us as to whether we refetch a document or not.
                             
                             String documentIDHash = dd.getDocumentIdentifierHash();
-                            String newDocVersion = newVersionStringArray[i];
-                            versionMap.put(documentIDHash,newDocVersion);
+                            VersionContext newDocContext = documentVersions.getDocumentVersion(dd.getDocumentIdentifier());
 
-                            if (newDocVersion == null)
+                            if (newDocContext == null)
                             {
                               deleteList.add(qd);
                             }
@@ -444,7 +443,7 @@ public class WorkerThread extends Thread
                               // See if we need to add, or update.
                               IPipelineSpecificationWithVersions specWithVersions = new PipelineSpecificationWithVersions(pipelineSpecification,qd);
                               boolean allowIngest = ingester.checkFetchDocument(specWithVersions,
-                                newDocVersion,
+                                newDocContext.getVersionString(),
                                 newParameterVersion,
                                 aclAuthority);
 
@@ -507,7 +506,6 @@ public class WorkerThread extends Thread
                             // Build a list of id's and flags
                             String[] processIDs = new String[fetchList.size()];
                             String[] processIDHashes = new String[fetchList.size()];
-                            String[] versions = new String[fetchList.size()];
                             boolean[] scanOnly = new boolean[fetchList.size()];
 
                             for (int i = 0; i < fetchList.size(); i++)
@@ -516,7 +514,6 @@ public class WorkerThread extends Thread
                               DocumentDescription dd = dToP.getDocument().getDocumentDescription();
                               processIDs[i] = dd.getDocumentIdentifier();
                               processIDHashes[i] = dd.getDocumentIdentifierHash();
-                              versions[i] = versionMap.get(dd.getDocumentIdentifierHash());
                               scanOnly[i] = dToP.getScanOnly();
                             }
 
@@ -530,7 +527,7 @@ public class WorkerThread extends Thread
                             try
                             {
 
-                              connector.processDocuments(processIDs,versions,activity,job.getSpecification(),scanOnly,jobType);
+                              connector.processDocuments(processIDs,documentVersions,activity,scanOnly,jobType);
 
                               // Flush remaining references into the database!
                               activity.flush();
@@ -780,7 +777,7 @@ public class WorkerThread extends Thread
                       finally
                       {
                         // Release any document temporary storage held by the connector
-                        connector.releaseDocumentVersions(currentDocIDArray,newVersionStringArray);
+                        connector.releaseDocumentVersions(currentDocIDArray,documentVersions);
                       }
                     
                     }



Mime
View raw message