manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1640350 [2/2] - in /manifoldcf/branches/dev_1x: ./ framework/ framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/ framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/ framework/pull-agent/src/main...
Date Tue, 18 Nov 2014 14:30:38 GMT
Modified: manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/SeedingActivity.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/SeedingActivity.java?rev=1640350&r1=1640349&r2=1640350&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/SeedingActivity.java
(original)
+++ manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/SeedingActivity.java
Tue Nov 18 14:30:38 2014
@@ -215,22 +215,24 @@ public class SeedingActivity implements 
     throws ManifoldCFException
   {
     // First, prioritize the documents using the queue tracker
-    long prioritizationTime = System.currentTimeMillis();
     IPriorityCalculator[] docPriorities = new IPriorityCalculator[docIDHashes.length];
 
-    int i = 0;
-    while (i < docIDHashes.length)
+    rt.clearPreloadRequests();
+
+    for (int i = 0 ; i < docIDHashes.length ; i++)
     {
       // Calculate desired document priority based on current queuetracker status.
       String[] bins = connector.getBinNames(docIDs[i]);
-      docPriorities[i] = new PriorityCalculator(rt,connection,bins);
-
-      i++;
+      PriorityCalculator p = new PriorityCalculator(rt,connection,bins);
+      docPriorities[i] = p;
+      p.makePreloadRequest();
     }
 
+    rt.preloadBinValues();
+
     jobManager.addDocumentsInitial(processID,
       jobID,legalLinkTypes,docIDHashes,docIDs,overrideSchedule,hopcountMethod,
-      prioritizationTime,docPriorities,prereqEventNames);
+      docPriorities,prereqEventNames);
 
   }
 

Modified: manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/SetPriorityThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/SetPriorityThread.java?rev=1640350&r1=1640349&r2=1640350&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/SetPriorityThread.java
(original)
+++ manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/SetPriorityThread.java
Tue Nov 18 14:30:38 2014
@@ -121,31 +121,31 @@ public class SetPriorityThread extends T
             }
 
             // Cycle through the current list of stuffer-identified documents until we come
to the end.  Reprioritize these
-            // first.
+            // first.  NOTE: These documents will already have document priorities.
             DocumentDescription desc = blockingDocuments.getBlockingDocument();
             if (desc != null)
             {
               ManifoldCF.writeDocumentPriorities(threadContext,
-                new DocumentDescription[]{desc},connectionMap,jobDescriptionMap,currentTime);
+                new DocumentDescription[]{desc},connectionMap,jobDescriptionMap);
               processedCount++;
               continue;
             }
-            /* no longer useful given current architecture; only need to reprioritize blocking
documents
+	    
             // Grab a list of document identifiers to set priority on.
             // We may well wind up calculating priority for documents that wind up having
their
             // state changed before we can write back, but this is okay because update is
only
             // going to be permitted for rows that still have the right state.
-            // I found that a limit of 1000 causes postgresql to basically do a linear scan,
while a limit of 20 does not!
-            DocumentDescription[] descs = jobManager.getNextReprioritizationDocuments(currentTime,20);
+            DocumentDescription[] descs = jobManager.getNextNotYetProcessedReprioritizationDocuments(processID,1000);
             if (descs.length > 0)
             {
-              writePriorities(threadContext,mgr,jobManager,descs,connectionMap,jobDescriptionMap,currentTime);
+              ManifoldCF.writeDocumentPriorities(threadContext,
+                descs,connectionMap,jobDescriptionMap);
               processedCount += descs.length;
               continue;
             }
-            */
+
             Logging.threads.debug("Done reprioritizing because no more documents to reprioritize");
-            ManifoldCF.sleep(30000L);
+            ManifoldCF.sleep(5000L);
             break;
 
           }

Modified: manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java?rev=1640350&r1=1640349&r2=1640350&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java
(original)
+++ manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java
Tue Nov 18 14:30:38 2014
@@ -138,8 +138,9 @@ public class StartupThread extends Threa
                   jobManager.prepareJobScan(jobID,legalLinkTypes,hopcountMethod,
                     model,jobType == IJobDescription.TYPE_CONTINUOUS,lastSeedingVersion ==
null,
                     requestMinimum);
-                  ManifoldCF.resetAllDocumentPriorities(threadContext,currentTime,processID);
                   
+                  ManifoldCF.resetAllDocumentPriorities(threadContext,processID);
+
                   if (Logging.threads.isDebugEnabled())
                     Logging.threads.debug("Prepared job "+jobID.toString()+" for execution.");
 

Modified: manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java?rev=1640350&r1=1640349&r2=1640350&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
(original)
+++ manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
Tue Nov 18 14:30:38 2014
@@ -1965,7 +1965,7 @@ public class WorkerThread extends Thread
 
         jobManager.addDocuments(processID,
           jobID,legalLinkTypes,docidHashes,docids,db.getParentIdentifierHash(),db.getLinkType(),hopcountMode,
-          dataNames,dataValues,currentTime,priorities,eventNames);
+          dataNames,dataValues,priorities,eventNames);
         
         rt.clearPreloadedValues();
       }



Mime
View raw message