manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1436727 - in /manifoldcf/branches/release-1.1-branch: ./ CHANGES.txt framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
Date Tue, 22 Jan 2013 02:30:26 GMT
Author: kwright
Date: Tue Jan 22 02:30:26 2013
New Revision: 1436727

URL: http://svn.apache.org/viewvc?rev=1436727&view=rev
Log:
Pull up fix for CONNECTORS-618 from trunk.

Modified:
    manifoldcf/branches/release-1.1-branch/   (props changed)
    manifoldcf/branches/release-1.1-branch/CHANGES.txt
    manifoldcf/branches/release-1.1-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
    manifoldcf/branches/release-1.1-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java

Propchange: manifoldcf/branches/release-1.1-branch/
------------------------------------------------------------------------------
  Merged /manifoldcf/trunk:r1436686

Modified: manifoldcf/branches/release-1.1-branch/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-1.1-branch/CHANGES.txt?rev=1436727&r1=1436726&r2=1436727&view=diff
==============================================================================
--- manifoldcf/branches/release-1.1-branch/CHANGES.txt (original)
+++ manifoldcf/branches/release-1.1-branch/CHANGES.txt Tue Jan 22 02:30:26 2013
@@ -3,6 +3,11 @@ $Id$
 
 ======================= Release 1.1 =====================
 
+CONNECTORS-618: MySQL orders indexes so that NULL values are first.
+This is a problem for the stuffer query, which then must go through
+millions of rows before it finds the one it is looking for.
+(Shigeki Kobayashi, Karl Wright)
+
 CONNECTORS-616: Work around Solr 4.0 or Jetty bug where connections
 are dropped randomly under multithreaded load.  Broken pipe exceptions
 are now retried after a minute, for up to three times, before the Solr

Modified: manifoldcf/branches/release-1.1-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-1.1-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java?rev=1436727&r1=1436726&r2=1436727&view=diff
==============================================================================
--- manifoldcf/branches/release-1.1-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
(original)
+++ manifoldcf/branches/release-1.1-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
Tue Jan 22 02:30:26 2013
@@ -2132,7 +2132,7 @@ public class JobManager implements IJobM
     {
       IResultRow row = set.getRow(0);
       Double docPriority = (Double)row.getValue(jobQueue.docPriorityField);
-      if (docPriority != null)
+      if (docPriority != null && docPriority.doubleValue() < jobQueue.noDocPriorityValue)
         scanRecord.addBins(docPriority);
     }
     return rval;

Modified: manifoldcf/branches/release-1.1-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-1.1-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java?rev=1436727&r1=1436726&r2=1436727&view=diff
==============================================================================
--- manifoldcf/branches/release-1.1-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
(original)
+++ manifoldcf/branches/release-1.1-branch/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
Tue Jan 22 02:30:26 2013
@@ -110,6 +110,9 @@ public class JobQueue extends org.apache
   public static final String prioritySetField = "priorityset";
   public static final String checkActionField = "checkaction";
 
+  public static final double noDocPriorityValue = 1e9;
+  public static final Double nullDocPriority = new Double(noDocPriorityValue + 1.0);
+  
   protected static Map statusMap;
 
   static
@@ -198,7 +201,10 @@ public class JobQueue extends org.apache
       }
       else
       {
-        // Upgrade code goes here, if needed
+        // Upgrade; null docpriority fields bashed to 'infinity', so they don't slow down
MySQL
+        Map map = new HashMap();
+        map.put(docPriorityField,nullDocPriority);
+        performUpdate(map,"WHERE "+docPriorityField+" IS NULL",null,null);
       }
 
       // Secondary table installation
@@ -688,7 +694,7 @@ public class JobQueue extends org.apache
   {
     HashMap map = new HashMap();
     map.put(prioritySetField,null);
-    map.put(docPriorityField,null);
+    map.put(docPriorityField,nullDocPriority);
     ArrayList list = new ArrayList();
     String query = buildConjunctionClause(list,new ClauseDescription[]{
       new UnitaryClause(jobIDField,jobID)});
@@ -715,7 +721,7 @@ public class JobQueue extends org.apache
       actionFieldValue = null;
       checkTimeValue = null;
       // Remove document priority; we don't want to pollute the queue.  See CONNECTORS-290.
-      map.put(docPriorityField,null);
+      map.put(docPriorityField,nullDocPriority);
       map.put(prioritySetField,null);
       break;
     case STATUS_ACTIVENEEDRESCAN:



Mime
View raw message