manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1621783 - in /manifoldcf/trunk: connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/ framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/connectors/ framework/pull-agent/src/main/java/o...
Date Mon, 01 Sep 2014 13:51:47 GMT
Author: kwright
Date: Mon Sep  1 13:51:47 2014
New Revision: 1621783

URL: http://svn.apache.org/r1621783
Log:
Remove IDocumentIdentifierStream. Part of CONNECTORS-977.

Removed:
    manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IDocumentIdentifierStream.java
Modified:
    manifoldcf/trunk/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java
    manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/connectors/BaseRepositoryConnector.java

Modified: manifoldcf/trunk/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java?rev=1621783&r1=1621782&r2=1621783&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java
(original)
+++ manifoldcf/trunk/connectors/jcifs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharedrive/SharedDriveConnector.java
Mon Sep  1 13:51:47 2014
@@ -60,7 +60,6 @@ import org.apache.manifoldcf.core.interf
 import org.apache.manifoldcf.core.interfaces.LockManagerFactory;
 import org.apache.manifoldcf.crawler.interfaces.ISeedingActivity;
 import org.apache.manifoldcf.crawler.interfaces.DocumentSpecification;
-import org.apache.manifoldcf.crawler.interfaces.IDocumentIdentifierStream;
 import org.apache.manifoldcf.crawler.interfaces.IProcessActivity;
 import org.apache.manifoldcf.crawler.interfaces.IFingerprintActivity;
 import org.apache.manifoldcf.core.interfaces.SpecificationNode;

Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/connectors/BaseRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/connectors/BaseRepositoryConnector.java?rev=1621783&r1=1621782&r2=1621783&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/connectors/BaseRepositoryConnector.java
(original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/connectors/BaseRepositoryConnector.java
Mon Sep  1 13:51:47 2014
@@ -155,188 +155,8 @@ public abstract class BaseRepositoryConn
     throws ManifoldCFException, ServiceInterruption
   {
     return "";
-    /*
-    long startTime;
-    if (lastSeedVersion == null)
-      startTime = 0L;
-    else
-    {
-      // Unpack seed time from seed version string
-      startTime = new Long(lastSeedVersion).longValue();
-    }
-    addSeedDocuments(activities,spec,startTime,seedTime,jobMode);
-    return new Long(seedTime).toString();
-    */
-  }
-
-  /** Queue "seed" documents.  Seed documents are the starting places for crawling activity.
 Documents
-  * are seeded when this method calls appropriate methods in the passed in ISeedingActivity
object.
-  *
-  * This method can choose to find repository changes that happen only during the specified
time interval.
-  * The seeds recorded by this method will be viewed by the framework based on what the
-  * getConnectorModel() method returns.
-  *
-  * It is not a big problem if the connector chooses to create more seeds than are
-  * strictly necessary; it is merely a question of overall work required.
-  *
-  * The times passed to this method may be interpreted for greatest efficiency.  The time
ranges
-  * any given job uses with this connector will not overlap, but will proceed starting at
0 and going
-  * to the "current time", each time the job is run.  For continuous crawling jobs, this
method will
-  * be called once, when the job starts, and at various periodic intervals as the job executes.
-  *
-  * When a job's specification is changed, the framework automatically resets the seeding
start time to 0.  The
-  * seeding start time may also be set to 0 on each job run, depending on the connector model
returned by
-  * getConnectorModel().
-  *
-  * Note that it is always ok to send MORE documents rather than less to this method.
-  *@param activities is the interface this method should use to perform whatever framework
actions are desired.
-  *@param spec is a document specification (that comes from the job).
-  *@param startTime is the beginning of the time range to consider, inclusive.
-  *@param endTime is the end of the time range to consider, exclusive.
-  *@param jobMode is an integer describing how the job is being run, whether continuous or
once-only.
-  */
-  /*
-  public void addSeedDocuments(ISeedingActivity activities, Specification spec,
-    long startTime, long endTime, int jobMode)
-    throws ManifoldCFException, ServiceInterruption
-  {
-    addSeedDocuments(activities,(DocumentSpecification)spec,startTime,endTime,jobMode);
   }
 
-  public void addSeedDocuments(ISeedingActivity activities, DocumentSpecification spec,
-    long startTime, long endTime, int jobMode)
-    throws ManifoldCFException, ServiceInterruption
-  {
-    addSeedDocuments(activities,spec,startTime,endTime);
-  }
-  */
-  
-  /** Queue "seed" documents.  Seed documents are the starting places for crawling activity.
 Documents
-  * are seeded when this method calls appropriate methods in the passed in ISeedingActivity
object.
-  *
-  * This method can choose to find repository changes that happen only during the specified
time interval.
-  * The seeds recorded by this method will be viewed by the framework based on what the
-  * getConnectorModel() method returns.
-  *
-  * It is not a big problem if the connector chooses to create more seeds than are
-  * strictly necessary; it is merely a question of overall work required.
-  *
-  * The times passed to this method may be interpreted for greatest efficiency.  The time
ranges
-  * any given job uses with this connector will not overlap, but will proceed starting at
0 and going
-  * to the "current time", each time the job is run.  For continuous crawling jobs, this
method will
-  * be called once, when the job starts, and at various periodic intervals as the job executes.
-  *
-  * When a job's specification is changed, the framework automatically resets the seeding
start time to 0.  The
-  * seeding start time may also be set to 0 on each job run, depending on the connector model
returned by
-  * getConnectorModel().
-  *
-  * Note that it is always ok to send MORE documents rather than less to this method.
-  *@param activities is the interface this method should use to perform whatever framework
actions are desired.
-  *@param spec is a document specification (that comes from the job).
-  *@param startTime is the beginning of the time range to consider, inclusive.
-  *@param endTime is the end of the time range to consider, exclusive.
-  */
-  /*
-  public void addSeedDocuments(ISeedingActivity activities, DocumentSpecification spec,
-    long startTime, long endTime)
-    throws ManifoldCFException, ServiceInterruption
-  {
-    // Call the old-style methods that get document identifiers, and then queue
-    // them using the new activities-based methods
-    IDocumentIdentifierStream ids = getDocumentIdentifiers(activities,spec,startTime,endTime);
-    if (ids != null)
-    {
-      try
-      {
-        while (true)
-        {
-          String id = ids.getNextIdentifier();
-          if (id == null) break;
-            activities.addSeedDocument(id);
-        }
-      }
-      finally
-      {
-        ids.close();
-      }
-    }
-    ids = getRemainingDocumentIdentifiers(activities,spec,startTime,endTime);
-    if (ids != null)
-    {
-      try
-      {
-        while (true)
-        {
-          String id = ids.getNextIdentifier();
-          if (id == null) break;
-            activities.addUnqueuedSeedDocument(id);
-        }
-      }
-      finally
-      {
-        ids.close();
-      }
-    }
-  }
-  */
-  
-  /** The long version of getDocumentIdentifiers.
-  *@param activities is the interface this method should use to perform whatever framework
actions are desired.
-  *@param spec is a document specification (that comes from the job).
-  *@param startTime is the beginning of the time range to consider, inclusive.
-  *@param endTime is the end of the time range to consider, exclusive.
-  *@return the local document identifiers that should be added to the queue, as a stream.
-  */
-  /*
-  public IDocumentIdentifierStream getDocumentIdentifiers(ISeedingActivity activities, DocumentSpecification
spec,
-    long startTime, long endTime)
-    throws ManifoldCFException, ServiceInterruption
-  {
-    return getDocumentIdentifiers(spec,startTime,endTime);
-  }
-  */
-  
-  /** The short version of getDocumentIdentifiers.
-  *@param spec is a document specification (that comes from the job).
-  *@param startTime is the beginning of the time range to consider, inclusive.
-  *@param endTime is the end of the time range to consider, exclusive.
-  *@return the local document identifiers that should be added to the queue, as a stream.
-  */
-  /*
-  public IDocumentIdentifierStream getDocumentIdentifiers(DocumentSpecification spec,
-    long startTime, long endTime)
-    throws ManifoldCFException, ServiceInterruption
-  {
-    // Something provided here so we can override either one.
-    return null;
-  }
-  */
-  
-  /** This method returns the document identifiers that should be considered part of the
seeds, but do not need to be
-  * queued for processing at this time.  This method is used to keep the hopcount tables
up to date.  It is
-  * allowed to return more identifiers than it strictly needs to, specifically identifiers
that were also returned
-  * by the getDocumentIdentifiers() method above.  However, it must constrain the identifiers
it returns by the document
-  * specification.
-  * This method is only required to do anything if the connector supports hopcount determination
(which it should signal by
-  * having more than zero legal relationship types returned by the getRelationshipTypes()
method.
-  *
-  *@param activities is the interface this method should use to perform whatever framework
actions are desired.
-  *@param spec is a document specification (that comes from the job).
-  *@param startTime is the beginning of the time range that was passed to getDocumentIdentifiers().
-  *@param endTime is the end of the time range to passed to getDocumentIdentifiers().
-  *@return the local document identifiers that should be added to the queue, as a stream,
or null, if none need to be
-  * returned.
-  */
-  /*
-  public IDocumentIdentifierStream getRemainingDocumentIdentifiers(ISeedingActivity activities,
DocumentSpecification spec,
-    long startTime, long endTime)
-    throws ManifoldCFException, ServiceInterruption
-  {
-    // Usually we don't need to worry about this.
-    return null;
-  }
-  */
-  
   /** Process a set of documents.
   * This is the method that should cause each document to be fetched, processed, and the
results either added
   * to the queue of documents for the current job, and/or entered into the incremental ingestion
manager.



Mime
View raw message