manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1624465 - in /manifoldcf/branches/dev_1x: ./ CHANGES.txt framework/core/src/main/java/org/apache/manifoldcf/core/database/DBInterfacePostgreSQL.java framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
Date Fri, 12 Sep 2014 06:55:41 GMT
Author: kwright
Date: Fri Sep 12 06:55:40 2014
New Revision: 1624465

URL: http://svn.apache.org/r1624465
Log:
Pull up fix for CONNECTORS-1027 from trunk.

Modified:
    manifoldcf/branches/dev_1x/   (props changed)
    manifoldcf/branches/dev_1x/CHANGES.txt
    manifoldcf/branches/dev_1x/framework/core/src/main/java/org/apache/manifoldcf/core/database/DBInterfacePostgreSQL.java
    manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java

Propchange: manifoldcf/branches/dev_1x/
------------------------------------------------------------------------------
  Merged /manifoldcf/branches/CONNECTORS-1027:r1624243-1624462
  Merged /manifoldcf/trunk:r1624464

Modified: manifoldcf/branches/dev_1x/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/CHANGES.txt?rev=1624465&r1=1624464&r2=1624465&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/CHANGES.txt (original)
+++ manifoldcf/branches/dev_1x/CHANGES.txt Fri Sep 12 06:55:40 2014
@@ -3,6 +3,10 @@ $Id$
 
 ======================= 1.8-dev =====================
 
+CONNECTORS-1027: Improve some general and PostgreSQL queries
+for large crawling sets.
+(Paul Boichat, Karl Wright)
+
 CONNECTORS-1025: SharePoint connector should skip blocked files.
 (Radek Sklenicka, Karl Wright)
 

Modified: manifoldcf/branches/dev_1x/framework/core/src/main/java/org/apache/manifoldcf/core/database/DBInterfacePostgreSQL.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/core/src/main/java/org/apache/manifoldcf/core/database/DBInterfacePostgreSQL.java?rev=1624465&r1=1624464&r2=1624465&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/core/src/main/java/org/apache/manifoldcf/core/database/DBInterfacePostgreSQL.java
(original)
+++ manifoldcf/branches/dev_1x/framework/core/src/main/java/org/apache/manifoldcf/core/database/DBInterfacePostgreSQL.java
Fri Sep 12 06:55:40 2014
@@ -1059,6 +1059,66 @@ public class DBInterfacePostgreSQL exten
     return 25;
   }
 
+  /* Calculate the number of values a particular clause can have, given the values for all
the other clauses.
+  * For example, if in the expression x AND y AND z, x has 2 values and z has 1, find out
how many values x can legally have
+  * when using the buildConjunctionClause() method below.
+  */
+  @Override
+  public int findConjunctionClauseMax(ClauseDescription[] otherClauseDescriptions)
+  {
+    // This implementation uses "OR"
+    return getMaxOrClause();
+  }
+
+  /* Construct a conjunction clause, e.g. x AND y AND z, where there is expected to be an
index (x,y,z,...), and where x, y, or z
+  * can have multiple distinct values, The proper implementation of this method differs from
database to database, because some databases
+  * only permit index operations when there are OR's between clauses, such as x1 AND y1 AND
z1 OR x2 AND y2 AND z2 ..., where others
+  * only recognize index operations when there are lists specified for each, such as x IN
(x1,x2) AND y IN (y1,y2) AND z IN (z1,z2).
+  */
+  @Override
+  public String buildConjunctionClause(List outputParameters, ClauseDescription[] clauseDescriptions)
+  {
+    // This implementation uses "OR" instead of "IN ()" for multiple values, since this generates
better plans in Postgresql 9.x.
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0 ; i < clauseDescriptions.length ; i++)
+    {
+      ClauseDescription cd = clauseDescriptions[i];
+      if (i > 0)
+        sb.append(" AND ");
+      String columnName = cd.getColumnName();
+      List values = cd.getValues();
+      String operation = cd.getOperation();
+      String joinColumn = cd.getJoinColumnName();
+      if (values != null)
+      {
+        if (values.size() > 1)
+        {
+          sb.append(" (");
+          for (int j = 0 ; j < values.size() ; j++)
+          {
+            if (j > 0)
+              sb.append(" OR ");
+            sb.append(columnName).append(operation).append("?");
+            outputParameters.add(values.get(j));
+          }
+          sb.append(")");
+        }
+        else
+        {
+          sb.append(columnName).append(operation).append("?");
+          outputParameters.add(values.get(0));
+        }
+      }
+      else if (joinColumn != null)
+      {
+        sb.append(columnName).append(operation).append(joinColumn);
+      }
+      else
+        sb.append(columnName).append(operation);
+    }
+    return sb.toString();
+  }
+
   /** For windowed report queries, e.g. maxActivity or maxBandwidth, obtain the maximum number
of rows
   * that can reasonably be expected to complete in an acceptable time.
   *@return the maximum number of rows.

Modified: manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java?rev=1624465&r1=1624464&r2=1624465&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
(original)
+++ manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
Fri Sep 12 06:55:40 2014
@@ -8564,10 +8564,11 @@ public class JobManager implements IJobM
         StringBuilder sb = new StringBuilder("SELECT ");
         ArrayList list = new ArrayList();
             
-        sb.append(database.constructCountClause(JobQueue.docHashField)).append(" AS doccount")
-          .append(" FROM ").append(jobQueue.getTableName()).append(" t1");
+        sb.append(database.constructCountClause("t2.x")).append(" AS doccount")
+          .append(" FROM (SELECT 'x' AS x FROM ").append(jobQueue.getTableName()).append("
t1");
         addWhereClause(sb,list,whereClause,whereParams,false);
-        sb.append(" ").append(database.constructOffsetLimitClause(0,maxCount+1,false));
+        sb.append(" ").append(database.constructOffsetLimitClause(0,maxCount+1,false))
+          .append(") t2");
         IResultSet countResult = database.performQuery(sb.toString(),list,null,null);
         if (countResult.getRowCount() > 0 && ((Long)countResult.getRow(0).getValue("doccount")).longValue()
> maxCount)
         {
@@ -8757,10 +8758,11 @@ public class JobManager implements IJobM
       // Now, for each job, fire off a separate, limited, query for each count we care about
       sb = new StringBuilder("SELECT ");
       list.clear();
-      sb.append(database.constructCountClause(JobQueue.docHashField)).append(" AS doccount")
-        .append(" FROM ").append(jobQueue.getTableName()).append(" WHERE ");
+      sb.append(database.constructCountClause("t2.x")).append(" AS doccount")
+        .append(" FROM (SELECT 'x' AS x FROM ").append(jobQueue.getTableName()).append("
WHERE ");
       sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{new UnitaryClause(JobQueue.jobIDField,jobID)}));
-      sb.append(" ").append(database.constructOffsetLimitClause(0,maxCount+1,false));
+      sb.append(" ").append(database.constructOffsetLimitClause(0,maxCount+1,false))
+        .append(") t2");
       
       IResultSet totalSet = database.performQuery(sb.toString(),list,null,null);
       if (totalSet.getRowCount() > 0)
@@ -8780,12 +8782,13 @@ public class JobManager implements IJobM
           
       sb = new StringBuilder("SELECT ");
       list.clear();
-      sb.append(database.constructCountClause(JobQueue.docHashField)).append(" AS doccount")
-        .append(" FROM ").append(jobQueue.getTableName()).append(" WHERE ");
+      sb.append(database.constructCountClause("t2.x")).append(" AS doccount")
+        .append(" FROM (SELECT 'x' AS x FROM ").append(jobQueue.getTableName()).append("
WHERE ");
       sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{new UnitaryClause(JobQueue.jobIDField,jobID)}));
       sb.append(" AND ");
       sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{buildOutstandingClause()}));
-      sb.append(" ").append(database.constructOffsetLimitClause(0,maxCount+1,false));
+      sb.append(" ").append(database.constructOffsetLimitClause(0,maxCount+1,false))
+        .append(") t2");
       
       IResultSet outstandingSet = database.performQuery(sb.toString(),list,null,null);
       if (outstandingSet.getRowCount() > 0)
@@ -8805,12 +8808,13 @@ public class JobManager implements IJobM
 
       sb = new StringBuilder("SELECT ");
       list.clear();
-      sb.append(database.constructCountClause(JobQueue.docHashField)).append(" AS doccount")
-        .append(" FROM ").append(jobQueue.getTableName()).append(" WHERE ");
+      sb.append(database.constructCountClause("t2.x")).append(" AS doccount")
+        .append(" FROM (SELECT 'x' AS x FROM ").append(jobQueue.getTableName()).append("
WHERE ");
       sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{new UnitaryClause(JobQueue.jobIDField,jobID)}));
       sb.append(" AND ");
       sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{buildProcessedClause()}));
-      sb.append(" ").append(database.constructOffsetLimitClause(0,maxCount+1,false));
+      sb.append(" ").append(database.constructOffsetLimitClause(0,maxCount+1,false))
+        .append(") t2");
       
       IResultSet processedSet = database.performQuery(sb.toString(),list,null,null);
       if (processedSet.getRowCount() > 0)



Mime
View raw message