manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1477689 - in /manifoldcf/branches/release-1.2-branch: ./ CHANGES.txt framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
Date Tue, 30 Apr 2013 15:31:57 GMT
Author: kwright
Date: Tue Apr 30 15:31:56 2013
New Revision: 1477689

URL: http://svn.apache.org/r1477689
Log:
Pull up fix for CONNECTORS-685 from trunk.

Modified:
    manifoldcf/branches/release-1.2-branch/   (props changed)
    manifoldcf/branches/release-1.2-branch/CHANGES.txt
    manifoldcf/branches/release-1.2-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
    manifoldcf/branches/release-1.2-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java

Propchange: manifoldcf/branches/release-1.2-branch/
------------------------------------------------------------------------------
  Merged /manifoldcf/trunk:r1477688

Modified: manifoldcf/branches/release-1.2-branch/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-1.2-branch/CHANGES.txt?rev=1477689&r1=1477688&r2=1477689&view=diff
==============================================================================
--- manifoldcf/branches/release-1.2-branch/CHANGES.txt (original)
+++ manifoldcf/branches/release-1.2-branch/CHANGES.txt Tue Apr 30 15:31:56 2013
@@ -3,6 +3,10 @@ $Id$
 
 =======================  Release 1.2 =====================
 
+CONNECTORS-685: Handle the case when connector model is
+ADD_CHANGE_DELETE and you change configuration data.
+(Maciej Li¿ewski, Karl Wright)
+
 CONNECTORS-676: Include DropBox connector.
 (Andrew Janowczyk, Karl Wright)
 

Modified: manifoldcf/branches/release-1.2-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-1.2-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java?rev=1477689&r1=1477688&r2=1477689&view=diff
==============================================================================
--- manifoldcf/branches/release-1.2-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
(original)
+++ manifoldcf/branches/release-1.2-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
Tue Apr 30 15:31:56 2013
@@ -5459,7 +5459,9 @@ public class JobManager implements IJobM
     // (1) If the connector has MODEL_ADD_CHANGE_DELETE, then
     // we let the connector run the show; there's no purge phase, and therefore the
     // documents are left in a COMPLETED state if they don't show up in the list
-    // of seeds that require the attention of the connector.
+    // of seeds that require the attention of the connector.  However, we do need to
+    // preload the queue with all the existing documents, if there was any change to the
+    // specification information (which will mean that fromBeginningOfTime is set).
     //
     // (2) If the connector has MODEL_ALL, then it's a full crawl no matter what, so
     // we do a full scan initialization.
@@ -5470,13 +5472,20 @@ public class JobManager implements IJobM
 
     // Complete connector model is told everything, so no delete phase.
     if (connectorModel == IRepositoryConnector.MODEL_ADD_CHANGE_DELETE)
+    {
+      if (fromBeginningOfTime)
+        jobQueue.queueAllExisting(jobID);
       return;
+    }
     
     // If the connector model is complete via chaining, then we just need to make
     // sure discovery works to queue the changes.
     if (connectorModel == IRepositoryConnector.MODEL_CHAINED_ADD_CHANGE_DELETE)
     {
-      jobQueue.preparePartialScan(jobID);
+      if (fromBeginningOfTime)
+        jobQueue.queueAllExisting(jobID);
+      else
+        jobQueue.preparePartialScan(jobID);
       return;
     }
     

Modified: manifoldcf/branches/release-1.2-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-1.2-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java?rev=1477689&r1=1477688&r2=1477689&view=diff
==============================================================================
--- manifoldcf/branches/release-1.2-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
(original)
+++ manifoldcf/branches/release-1.2-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
Tue Apr 30 15:31:56 2013
@@ -603,6 +603,31 @@ public class JobQueue extends org.apache
     TrackerClass.noteJobChange(jobID,"Prepare full scan");
   }
 
+  /** For ADD_CHANGE_DELETE jobs where the specifications have been changed,
+  * we must reconsider every existing document.  So reconsider them all.
+  *@param jobID is the job identifier.
+  */
+  public void queueAllExisting(Long jobID)
+    throws ManifoldCFException
+  {
+    // Map COMPLETE to PENDINGPURGATORY
+    HashMap map = new HashMap();
+    map.put(statusField,statusToString(STATUS_PENDINGPURGATORY));
+    // Do not reset priorities here!  They should all be blank at this point.
+    map.put(checkTimeField,new Long(0L));
+    map.put(checkActionField,actionToString(ACTION_RESCAN));
+    map.put(failTimeField,null);
+    map.put(failCountField,null);
+    ArrayList list = new ArrayList();
+    String query = buildConjunctionClause(list,new ClauseDescription[]{
+      new UnitaryClause(jobIDField,jobID),
+      new UnitaryClause(statusField,statusToString(STATUS_COMPLETE))});
+    performUpdate(map,"WHERE "+query,list,null);
+    noteModifications(0,1,0);
+    // Do an analyze, otherwise our plans are going to be crap right off the bat
+    unconditionallyAnalyzeTables();
+    }
+    
   /** Prepare for a "partial" job.  This is called ONLY when the job is inactive.
   *
   * This method maps all COMPLETE entries to UNCHANGED.  The purpose is to



Mime
View raw message