incubator-connectors-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r998635 - in /incubator/lcf/trunk/modules/framework: core/src/main/java/org/apache/acf/core/database/ core/src/main/java/org/apache/acf/core/interfaces/ pull-agent/src/main/java/org/apache/acf/crawler/repository/
Date Sun, 19 Sep 2010 12:02:06 GMT
Author: kwright
Date: Sun Sep 19 12:02:05 2010
New Revision: 998635

URL: http://svn.apache.org/viewvc?rev=998635&view=rev
Log:
Partial fix for CONNECTORS-110.  Move the logic for DISTINCT ON into the database driver,
and reorganize the queries to work with Derby, since its GROUP BY logic is quite different
from Postgresql's.  The Derby implementation of DISTINCT ON is not correct, but will at least
keep there from being exceptions.

Modified:
    incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/BaseTable.java
    incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/DBInterfaceDerby.java
    incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/DBInterfaceMySQL.java
    incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/DBInterfacePostgreSQL.java
    incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/interfaces/IDBInterface.java
    incubator/lcf/trunk/modules/framework/pull-agent/src/main/java/org/apache/acf/crawler/repository/RepositoryHistoryManager.java

Modified: incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/BaseTable.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/BaseTable.java?rev=998635&r1=998634&r2=998635&view=diff
==============================================================================
--- incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/BaseTable.java
(original)
+++ incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/BaseTable.java
Sun Sep 19 12:02:05 2010
@@ -319,6 +319,21 @@ public class BaseTable
     return dbInterface.constructOffsetLimitClause(offset,limit);
   }
 
+  /** Construct a 'distinct on (x)' filter.
+  * This filter wraps a query and returns a new query whose results are similar to POSTGRESQL's
DISTINCT-ON feature.
+  * Specifically, for each combination of the specified distinct fields in the result, only
the first such row is included in the final
+  * result.
+  *@param baseQuery is the base query, which can either be tables and where clause, or can
be another SELECT in parens,
+  * e.g. "(SELECT ...) t3"
+  *@param distinctFields are the fields to consider to be distinct.
+  *@param otherFields are the rest of the fields to return, keyed by the AS name, value being
the column value, e.g. "value AS key"
+  *@return a revised query that performs the necessary DISTINCT ON operation.
+  */
+  public String constructDistinctOnClause(String baseQuery, String[] distinctFields, Map
otherFields)
+  {
+    return dbInterface.constructDistinctOnClause(baseQuery,distinctFields,otherFields);
+  }
+
   /** Quote a sql string.
   * This method quotes a sql string in the proper manner for the database in question.
   *@param string is the input string.

Modified: incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/DBInterfaceDerby.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/DBInterfaceDerby.java?rev=998635&r1=998634&r2=998635&view=diff
==============================================================================
--- incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/DBInterfaceDerby.java
(original)
+++ incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/DBInterfaceDerby.java
Sun Sep 19 12:02:05 2010
@@ -846,6 +846,35 @@ public class DBInterfaceDerby extends Da
     return sb.toString();
   }
 
+  /** Construct a 'distinct on (x)' filter.
+  * This filter wraps a query and returns a new query whose results are similar to POSTGRESQL's
DISTINCT-ON feature.
+  * Specifically, for each combination of the specified distinct fields in the result, only
the first such row is included in the final
+  * result.
+  *@param baseQuery is the base query, which can either be tables and where clause, or can
be another SELECT in parens,
+  * e.g. "(SELECT ...) t3"
+  *@param distinctFields are the fields to consider to be distinct.
+  *@param otherFields are the rest of the fields to return, keyed by the AS name, value being
the column value, e.g. "value AS key"
+  *@return a revised query that performs the necessary DISTINCT ON operation.
+  */
+  public String constructDistinctOnClause(String baseQuery, String[] distinctFields, Map
otherFields)
+  {
+    // Derby does not support this functionality.  I can find no way around it either.
+    StringBuffer sb = new StringBuffer("SELECT ");
+    boolean needComma = false;
+    Iterator iter = otherFields.keySet().iterator();
+    while (iter.hasNext())
+    {
+      String fieldName = (String)iter.next();
+      String columnValue = (String)otherFields.get(fieldName);
+      if (needComma)
+        sb.append(",");
+      needComma = true;
+      sb.append(columnValue).append(" AS ").append(fieldName);
+    }
+    sb.append(" FROM ").append(baseQuery);
+    return sb.toString();
+  }
+
   /** Quote a sql string.
   * This method quotes a sql string in the proper manner for the database in question.
   *@param string is the input string.

Modified: incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/DBInterfaceMySQL.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/DBInterfaceMySQL.java?rev=998635&r1=998634&r2=998635&view=diff
==============================================================================
--- incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/DBInterfaceMySQL.java
(original)
+++ incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/DBInterfaceMySQL.java
Sun Sep 19 12:02:05 2010
@@ -583,6 +583,36 @@ public class DBInterfaceMySQL extends Da
     return sb.toString();
   }
 
+  /** Construct a 'distinct on (x)' filter.
+  * This filter wraps a query and returns a new query whose results are similar to POSTGRESQL's
DISTINCT-ON feature.
+  * Specifically, for each combination of the specified distinct fields in the result, only
the first such row is included in the final
+  * result.
+  *@param baseQuery is the base query, which can either be tables and where clause, or can
be another SELECT in parens,
+  * e.g. "(SELECT ...) t3"
+  *@param distinctFields are the fields to consider to be distinct.
+  *@param otherFields are the rest of the fields to return, keyed by the AS name, value being
the column value, e.g. "value AS key"
+  *@return a revised query that performs the necessary DISTINCT ON operation.
+  */
+  public String constructDistinctOnClause(String baseQuery, String[] distinctFields, Map
otherFields)
+  {
+    // I don't know whether MySql supports this functionality or not.
+    // MHL
+    StringBuffer sb = new StringBuffer("SELECT ");
+    boolean needComma = false;
+    Iterator iter = otherFields.keySet().iterator();
+    while (iter.hasNext())
+    {
+      String fieldName = (String)iter.next();
+      String columnValue = (String)otherFields.get(fieldName);
+      if (needComma)
+        sb.append(",");
+      needComma = true;
+      sb.append(columnValue).append(" AS ").append(fieldName);
+    }
+    sb.append(" FROM ").append(baseQuery);
+    return sb.toString();
+  }
+
   /** Quote a sql string.
   * This method quotes a sql string in the proper manner for the database in question.
   *@param string is the input string.

Modified: incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/DBInterfacePostgreSQL.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/DBInterfacePostgreSQL.java?rev=998635&r1=998634&r2=998635&view=diff
==============================================================================
--- incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/DBInterfacePostgreSQL.java
(original)
+++ incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/database/DBInterfacePostgreSQL.java
Sun Sep 19 12:02:05 2010
@@ -865,6 +865,42 @@ public class DBInterfacePostgreSQL exten
     return sb.toString();
   }
 
+  /** Construct a 'distinct on (x)' filter.
+  * This filter wraps a query and returns a new query whose results are similar to POSTGRESQL's
DISTINCT-ON feature.
+  * Specifically, for each combination of the specified distinct fields in the result, only
the first such row is included in the final
+  * result.
+  *@param baseQuery is the base query, which can either be tables and where clause, or can
be another SELECT in parens,
+  * e.g. "(SELECT ...) t3"
+  *@param distinctFields are the fields to consider to be distinct.
+  *@param otherFields are the rest of the fields to return, keyed by the AS name, value being
the column value, e.g. "value AS key"
+  *@return a revised query that performs the necessary DISTINCT ON operation.
+  */
+  public String constructDistinctOnClause(String baseQuery, String[] distinctFields, Map
otherFields)
+  {
+    StringBuffer sb = new StringBuffer("SELECT DISTINCT ON(");
+    int i = 0;
+    while (i < distinctFields.length)
+    {
+      if (i > 0)
+        sb.append(",");
+      sb.append(distinctFields[i++]);
+    }
+    sb.append(" ");
+    Iterator iter = otherFields.keySet().iterator();
+    boolean needComma = false;
+    while (iter.hasNext())
+    {
+      String fieldName = (String)iter.next();
+      String columnValue = (String)otherFields.get(fieldName);
+      if (needComma)
+        sb.append(",");
+      needComma = true;
+      sb.append(columnValue).append(" AS ").append(fieldName);
+    }
+    sb.append(" FROM ").append(baseQuery);
+    return sb.toString();
+  }
+
   /** Quote a sql string.
   * This method quotes a sql string in the proper manner for the database in question.
   *@param string is the input string.

Modified: incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/interfaces/IDBInterface.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/interfaces/IDBInterface.java?rev=998635&r1=998634&r2=998635&view=diff
==============================================================================
--- incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/interfaces/IDBInterface.java
(original)
+++ incubator/lcf/trunk/modules/framework/core/src/main/java/org/apache/acf/core/interfaces/IDBInterface.java
Sun Sep 19 12:02:05 2010
@@ -283,6 +283,18 @@ public interface IDBInterface
   */
   public String constructOffsetLimitClause(int offset, int limit);
   
+  /** Construct a 'distinct on (x)' filter.
+  * This filter wraps a query and returns a new query whose results are similar to POSTGRESQL's
DISTINCT-ON feature.
+  * Specifically, for each combination of the specified distinct fields in the result, only
the first such row is included in the final
+  * result.
+  *@param baseQuery is the base query, which can either be tables and where clause, or can
be another SELECT in parens,
+  * e.g. "(SELECT ...) t3"
+  *@param distinctFields are the fields to consider to be distinct.
+  *@param otherFields are the rest of the fields to return, keyed by the AS name, value being
the column value, e.g. "value AS key"
+  *@return a revised query that performs the necessary DISTINCT ON operation.
+  */
+  public String constructDistinctOnClause(String baseQuery, String[] distinctFields, Map
otherFields);
+  
   /** Quote a sql string.
   * This method quotes a sql string in the proper manner for the database in question.
   *@param string is the input string.

Modified: incubator/lcf/trunk/modules/framework/pull-agent/src/main/java/org/apache/acf/crawler/repository/RepositoryHistoryManager.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/modules/framework/pull-agent/src/main/java/org/apache/acf/crawler/repository/RepositoryHistoryManager.java?rev=998635&r1=998634&r2=998635&view=diff
==============================================================================
--- incubator/lcf/trunk/modules/framework/pull-agent/src/main/java/org/apache/acf/crawler/repository/RepositoryHistoryManager.java
(original)
+++ incubator/lcf/trunk/modules/framework/pull-agent/src/main/java/org/apache/acf/crawler/repository/RepositoryHistoryManager.java
Sun Sep 19 12:02:05 2010
@@ -329,19 +329,24 @@ public class RepositoryHistoryManager ex
     // items to the list.  One is based on the start time of the current record; the other
is based on the
     // end time of the current record.  That's why there are two inner clauses with a UNION.
 
-    StringBuffer sb = new StringBuffer("SELECT * FROM (SELECT DISTINCT ON (idbucket) t3.bucket
AS idbucket, t3.activitycount AS activitycount, t3.windowstart AS starttime, t3.windowend
AS endtime FROM (SELECT * FROM (SELECT ");
+    StringBuffer sb = new StringBuffer();
+    sb.append("(SELECT * FROM (SELECT t6.bucket AS bucket,")
+      .append("t6.windowstart AS windowstart,t6.windowend AS windowend, SUM(t6.activitycount)
AS activitycount")
+      .append(" FROM (SELECT ");
 
     // Turn the interval into a string, since we'll need it a lot.
     String intervalString = new Long(interval).toString();
 
     sb.append("t0.bucket AS bucket, t0.").append(startTimeField).append(" AS windowstart,
t0.")
-      .append(startTimeField).append("+").append(intervalString).append(" AS windowend, SUM(CAST(((CASE
WHEN t0.")
+      .append(startTimeField).append("+").append(intervalString).append(" AS windowend, ")
+      .append("CAST(((CASE WHEN t0.")
       .append(startTimeField).append("+").append(intervalString).append("<t1.").append(endTimeField)
       .append(" THEN t0.").append(startTimeField).append("+").append(intervalString).append("
ELSE t1.")
       .append(endTimeField).append(" END) - (CASE WHEN t0.").append(startTimeField).append(">t1.").append(startTimeField)
       .append(" THEN t0.").append(startTimeField).append(" ELSE t1.").append(startTimeField)
       .append(" END)) AS DOUBLE PRECISION) / CAST((t1.").append(endTimeField).append("-t1.").append(startTimeField)
-      .append(") AS DOUBLE PRECISION)) AS activitycount FROM (SELECT DISTINCT ");
+      .append(") AS DOUBLE PRECISION)")
+      .append(" AS activitycount FROM (SELECT DISTINCT ");
     addBucketExtract(sb,"",entityIdentifierField,idBucket);
     sb.append(" AS bucket,").append(startTimeField).append(" FROM ").append(getTableName());
     addCriteria(sb,"",connectionName,filterCriteria,false);
@@ -352,9 +357,12 @@ public class RepositoryHistoryManager ex
     sb.append(" AND t1.").append(startTimeField).append("<t0.").append(startTimeField).append("+").append(intervalString)
       .append(" AND t1.").append(endTimeField).append(">t0.").append(startTimeField);
     addCriteria(sb,"t1.",connectionName,filterCriteria,true);
-    sb.append(" GROUP BY bucket,windowstart,windowend UNION SELECT ");
+    sb.append(") t6 GROUP BY bucket,windowstart,windowend UNION SELECT t6a.bucket AS bucket,")
+      .append("t6a.windowstart AS windowstart, t6a.windowend AS windowend, SUM(t6a.activitycount)
AS activitycount")
+      .append(" FROM (SELECT ");
     sb.append("t0a.bucket AS bucket, t0a.").append(endTimeField).append("-").append(intervalString).append("
AS windowstart, t0a.")
-      .append(endTimeField).append(" AS windowend, SUM(CAST(((CASE WHEN t0a.")
+      .append(endTimeField).append(" AS windowend, ")
+      .append("CAST(((CASE WHEN t0a.")
       .append(endTimeField).append("<t1a.").append(endTimeField)
       .append(" THEN t0a.").append(endTimeField).append(" ELSE t1a.")
       .append(endTimeField).append(" END) - (CASE WHEN t0a.").append(endTimeField).append("-").append(intervalString)
@@ -362,7 +370,8 @@ public class RepositoryHistoryManager ex
       .append(" THEN t0a.").append(endTimeField).append("-").append(intervalString).append("
ELSE t1a.")
       .append(startTimeField)
       .append(" END)) AS DOUBLE PRECISION) / CAST((t1a.").append(endTimeField).append("-t1a.").append(startTimeField)
-      .append(") AS DOUBLE PRECISION)) AS activitycount FROM (SELECT DISTINCT ");
+      .append(") AS DOUBLE PRECISION)")
+      .append(" AS activitycount FROM (SELECT DISTINCT ");
     addBucketExtract(sb,"",entityIdentifierField,idBucket);
     sb.append(" AS bucket,").append(endTimeField).append(" FROM ").append(getTableName());
     addCriteria(sb,"",connectionName,filterCriteria,false);
@@ -373,7 +382,16 @@ public class RepositoryHistoryManager ex
     sb.append(" AND t1a.").append(startTimeField).append("<t0a.").append(endTimeField)
       .append(" AND t1a.").append(endTimeField).append(">t0a.").append(endTimeField).append("-").append(intervalString);
     addCriteria(sb,"t1a.",connectionName,filterCriteria,true);
-    sb.append(" GROUP BY bucket,windowstart,windowend) t2 ORDER BY bucket ASC, activitycount
DESC) t3) t4");
+    sb.append(") t6a GROUP BY bucket,windowstart,windowend) t2 ORDER BY bucket ASC, activitycount
DESC) t3");
+
+    Map otherColumns = new HashMap();
+    otherColumns.put("idbucket","t3.bucket");
+    otherColumns.put("activitycount","t3.activitycount");
+    otherColumns.put("starttime","t3.windowstart");
+    otherColumns.put("endtime","t3.windowend");
+    String filteredQuery = constructDistinctOnClause(sb.toString(),new String[]{"idbucket"},otherColumns);
+    sb = new StringBuffer("SELECT * FROM (");
+    sb.append(filteredQuery).append(") t4");
     addOrdering(sb,new String[]{"activitycount","starttime","endtime","idbucket"},sort);
     addLimits(sb,startRow,maxRowCount);
     return performQuery(sb.toString(),null,null,null,maxRowCount);
@@ -416,20 +434,25 @@ public class RepositoryHistoryManager ex
     // items to the list.  One is based on the start time of the current record; the other
is based on the
     // end time of the current record.  That's why there are two inner clauses with a UNION.
 
-    StringBuffer sb = new StringBuffer("SELECT * FROM (SELECT DISTINCT ON (idbucket) t3.bucket
AS idbucket, t3.bytecount AS bytecount, t3.windowstart AS starttime, t3.windowend AS endtime
FROM (SELECT * FROM (SELECT ");
+    StringBuffer sb = new StringBuffer();
+    sb.append("(SELECT * FROM (SELECT t6.bucket AS bucket,")
+      .append("t6.windowstart AS windowstart, t6.windowend AS windowend, SUM(t6.bytecount)
AS bytecount")
+      .append(" FROM (SELECT ");
 
     // Turn the interval into a string, since we'll need it a lot.
     String intervalString = new Long(interval).toString();
 
     sb.append("t0.bucket AS bucket, t0.").append(startTimeField).append(" AS windowstart,
t0.")
-      .append(startTimeField).append("+").append(intervalString).append(" AS windowend, SUM(t1.").append(dataSizeField)
+      .append(startTimeField).append("+").append(intervalString).append(" AS windowend, ")
+      .append("t1.").append(dataSizeField)
       .append(" * ((CASE WHEN t0.")
       .append(startTimeField).append("+").append(intervalString).append("<t1.").append(endTimeField)
       .append(" THEN t0.").append(startTimeField).append("+").append(intervalString).append("
ELSE t1.")
       .append(endTimeField).append(" END) - (CASE WHEN t0.").append(startTimeField).append(">t1.").append(startTimeField)
       .append(" THEN t0.").append(startTimeField).append(" ELSE t1.").append(startTimeField)
       .append(" END)) / (t1.").append(endTimeField).append("-t1.").append(startTimeField)
-      .append(")) AS bytecount FROM (SELECT DISTINCT ");
+      .append(")")
+      .append(" AS bytecount FROM (SELECT DISTINCT ");
     addBucketExtract(sb,"",entityIdentifierField,idBucket);
     sb.append(" AS bucket,").append(startTimeField).append(" FROM ").append(getTableName());
     addCriteria(sb,"",connectionName,filterCriteria,false);
@@ -440,9 +463,12 @@ public class RepositoryHistoryManager ex
     sb.append(" AND t1.").append(startTimeField).append("<t0.").append(startTimeField).append("+").append(intervalString)
       .append(" AND t1.").append(endTimeField).append(">t0.").append(startTimeField);
     addCriteria(sb,"t1.",connectionName,filterCriteria,true);
-    sb.append(" GROUP BY bucket,windowstart,windowend UNION SELECT ")
+    sb.append(") t6 GROUP BY bucket,windowstart,windowend UNION SELECT t6a.bucket AS bucket,")
+      .append("t6a.windowstart AS windowstart, t6a.windowend AS windowend, SUM(t6a.bytecount)
AS bytecount")
+      .append(" FROM (SELECT ")
       .append("t0a.bucket AS bucket, t0a.").append(endTimeField).append("-").append(intervalString).append("
AS windowstart, t0a.")
-      .append(endTimeField).append(" AS windowend, SUM(t1a.").append(dataSizeField).append("
* ((CASE WHEN t0a.")
+      .append(endTimeField).append(" AS windowend, ")
+      .append("t1a.").append(dataSizeField).append(" * ((CASE WHEN t0a.")
       .append(endTimeField).append("<t1a.").append(endTimeField)
       .append(" THEN t0a.").append(endTimeField).append(" ELSE t1a.")
       .append(endTimeField).append(" END) - (CASE WHEN t0a.").append(endTimeField).append("-").append(intervalString)
@@ -450,7 +476,8 @@ public class RepositoryHistoryManager ex
       .append(" THEN t0a.").append(endTimeField).append("-").append(intervalString).append("
ELSE t1a.")
       .append(startTimeField)
       .append(" END)) / (t1a.").append(endTimeField).append("-t1a.").append(startTimeField)
-      .append(")) AS bytecount FROM (SELECT DISTINCT ");
+      .append(")")
+      .append(" AS bytecount FROM (SELECT DISTINCT ");
     addBucketExtract(sb,"",entityIdentifierField,idBucket);
     sb.append(" AS bucket,").append(endTimeField).append(" FROM ").append(getTableName());
     addCriteria(sb,"",connectionName,filterCriteria,false);
@@ -461,8 +488,16 @@ public class RepositoryHistoryManager ex
     sb.append(" AND t1a.").append(startTimeField).append("<t0a.").append(endTimeField)
       .append(" AND t1a.").append(endTimeField).append(">t0a.").append(endTimeField).append("-").append(intervalString);
     addCriteria(sb,"t1a.",connectionName,filterCriteria,true);
-    sb.append(" GROUP BY bucket,windowstart,windowend) t2 ORDER BY bucket ASC, bytecount
DESC) t3) t4");
-
+    sb.append(") t6a GROUP BY bucket,windowstart,windowend) t2 ORDER BY bucket ASC, bytecount
DESC) t3");
+    
+    Map otherColumns = new HashMap();
+    otherColumns.put("idbucket","t3.bucket");
+    otherColumns.put("bytecount","t3.bytecount");
+    otherColumns.put("starttime","t3.windowstart");
+    otherColumns.put("endtime","t3.windowend");
+    String filteredQuery = constructDistinctOnClause(sb.toString(),new String[]{"idbucket"},otherColumns);
+    sb = new StringBuffer("SELECT * FROM (");
+    sb.append(filteredQuery).append(") t4");
     addOrdering(sb,new String[]{"bytecount","starttime","endtime","idbucket"},sort);
     addLimits(sb,startRow,maxRowCount);
     return performQuery(sb.toString(),null,null,null,maxRowCount);



Mime
View raw message