manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From piergior...@apache.org
Subject svn commit: r1600691 - in /manifoldcf/trunk: ./ connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/
Date Thu, 05 Jun 2014 16:10:52 GMT
Author: piergiorgio
Date: Thu Jun  5 16:10:50 2014
New Revision: 1600691

URL: http://svn.apache.org/r1600691
Log:
Now the CMIS Connector allow to create indexes only for properties specified in the select
clause (CONNECTORS-950)

Modified:
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
    manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnectorUtils.java

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1600691&r1=1600690&r2=1600691&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Thu Jun  5 16:10:50 2014
@@ -3,6 +3,9 @@ $Id$
 
 ======================= 1.7-dev =====================
 
+CONNECTORS-950:CMIS Connector should ingest only the properties specified in the select clause
+(Piergiorgio Lucidi)
+
 CONNECTORS-916: Supply an Amazon Cloud Search connector.
 (Takumi Yoshida, Karl Wright)
 

Modified: manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java?rev=1600691&r1=1600690&r2=1600691&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
(original)
+++ manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
Thu Jun  5 16:10:50 2014
@@ -141,6 +141,8 @@ public class CmisRepositoryConnector ext
 
   protected static final long timeToRelease = 300000L;
   protected long lastSessionFetch = -1L;
+  
+  protected String cmisQuery = StringUtils.EMPTY;
 
   /**
    * Constructor
@@ -647,7 +649,7 @@ public class CmisRepositoryConnector ext
 
     getSession();
 
-    String cmisQuery = StringUtils.EMPTY;
+    cmisQuery = StringUtils.EMPTY;
     int i = 0;
     while (i < spec.getChildCount()) {
       SpecificationNode sn = spec.getChild(i);
@@ -1111,93 +1113,99 @@ public class CmisRepositoryConnector ext
           String id = StringUtils.EMPTY;
           for (Property<?> property : properties) {
             String propertyId = property.getId();
-            if (propertyId.endsWith(Constants.PARAM_OBJECT_ID))
-              id = (String) property.getValue();
-
-            if (property.getValue() !=null 
-                || property.getValues() != null) {
-              PropertyType propertyType = property.getType();
-
-              switch (propertyType) {
-
-              case STRING:
-              case ID:
-              case URI:
-              case HTML:
-                if(property.isMultiValued()){
-                  List<String> htmlPropertyValues = (List<String>) property.getValues();
-                  for (String htmlPropertyValue : htmlPropertyValues) {
-                    rd.addField(propertyId, htmlPropertyValue);
-                  }
-                } else {
-                  String stringValue = (String) property.getValue();
-                  if(StringUtils.isNotEmpty(stringValue)){
-                    rd.addField(propertyId, stringValue);
-                  }
-                }
-                break;
-     
-              case BOOLEAN:
-                if(property.isMultiValued()){
-                  List<Boolean> booleanPropertyValues = (List<Boolean>) property.getValues();
-                  for (Boolean booleanPropertyValue : booleanPropertyValues) {
-                    rd.addField(propertyId, booleanPropertyValue.toString());
-                  }
-                } else {
-                  Boolean booleanValue = (Boolean) property.getValue();
-                  if(booleanValue!=null){
-                    rd.addField(propertyId, booleanValue.toString());
-                  }
-                }
-                break;
-
-              case INTEGER:
-                if(property.isMultiValued()){
-                  List<BigInteger> integerPropertyValues = (List<BigInteger>)
property.getValues();
-                  for (BigInteger integerPropertyValue : integerPropertyValues) {
-                    rd.addField(propertyId, integerPropertyValue.toString());
-                  }
-                } else {
-                  BigInteger integerValue = (BigInteger) property.getValue();
-                  if(integerValue!=null){
-                    rd.addField(propertyId, integerValue.toString());
-                  }
-                }
-                break;
-
-              case DECIMAL:
-                if(property.isMultiValued()){
-                  List<BigDecimal> decimalPropertyValues = (List<BigDecimal>)
property.getValues();
-                  for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
-                    rd.addField(propertyId, decimalPropertyValue.toString());
-                  }
-                } else {
-                  BigDecimal decimalValue = (BigDecimal) property.getValue();
-                  if(decimalValue!=null){
-                    rd.addField(propertyId, decimalValue.toString());
-                  }
-                }
-                break;
-
-              case DATETIME:
-                if(property.isMultiValued()){
-                  List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>)
property.getValues();
-                  for (GregorianCalendar datePropertyValue : datePropertyValues) {
-                    rd.addField(propertyId,
-                        ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
-                  }
-                } else {
-                  GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
-                  if(dateValue!=null){
-                    rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
+            
+            if(CmisRepositoryConnectorUtils.existsInSelectClause(cmisQuery, propertyId)){
+              
+              if (propertyId.endsWith(Constants.PARAM_OBJECT_ID))
+                id = (String) property.getValue();
+  
+                if (property.getValue() !=null 
+                    || property.getValues() != null) {
+                  PropertyType propertyType = property.getType();
+    
+                  switch (propertyType) {
+    
+                  case STRING:
+                  case ID:
+                  case URI:
+                  case HTML:
+                    if(property.isMultiValued()){
+                      List<String> htmlPropertyValues = (List<String>) property.getValues();
+                      for (String htmlPropertyValue : htmlPropertyValues) {
+                        rd.addField(propertyId, htmlPropertyValue);
+                      }
+                    } else {
+                      String stringValue = (String) property.getValue();
+                      if(StringUtils.isNotEmpty(stringValue)){
+                        rd.addField(propertyId, stringValue);
+                      }
+                    }
+                    break;
+         
+                  case BOOLEAN:
+                    if(property.isMultiValued()){
+                      List<Boolean> booleanPropertyValues = (List<Boolean>) property.getValues();
+                      for (Boolean booleanPropertyValue : booleanPropertyValues) {
+                        rd.addField(propertyId, booleanPropertyValue.toString());
+                      }
+                    } else {
+                      Boolean booleanValue = (Boolean) property.getValue();
+                      if(booleanValue!=null){
+                        rd.addField(propertyId, booleanValue.toString());
+                      }
+                    }
+                    break;
+    
+                  case INTEGER:
+                    if(property.isMultiValued()){
+                      List<BigInteger> integerPropertyValues = (List<BigInteger>)
property.getValues();
+                      for (BigInteger integerPropertyValue : integerPropertyValues) {
+                        rd.addField(propertyId, integerPropertyValue.toString());
+                      }
+                    } else {
+                      BigInteger integerValue = (BigInteger) property.getValue();
+                      if(integerValue!=null){
+                        rd.addField(propertyId, integerValue.toString());
+                      }
+                    }
+                    break;
+    
+                  case DECIMAL:
+                    if(property.isMultiValued()){
+                      List<BigDecimal> decimalPropertyValues = (List<BigDecimal>)
property.getValues();
+                      for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
+                        rd.addField(propertyId, decimalPropertyValue.toString());
+                      }
+                    } else {
+                      BigDecimal decimalValue = (BigDecimal) property.getValue();
+                      if(decimalValue!=null){
+                        rd.addField(propertyId, decimalValue.toString());
+                      }
+                    }
+                    break;
+    
+                  case DATETIME:
+                    if(property.isMultiValued()){
+                      List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>)
property.getValues();
+                      for (GregorianCalendar datePropertyValue : datePropertyValues) {
+                        rd.addField(propertyId,
+                            ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
+                      }
+                    } else {
+                      GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
+                      if(dateValue!=null){
+                        rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
+                      }
+                    }
+                    break;
+    
+                  default:
+                    break;
                   }
                 }
-                break;
-
-              default:
-                break;
+                
               }
-            }
+            
           }
           
           //ingestion

Modified: manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnectorUtils.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnectorUtils.java?rev=1600691&r1=1600690&r2=1600691&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnectorUtils.java
(original)
+++ manifoldcf/trunk/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnectorUtils.java
Thu Jun  5 16:10:50 2014
@@ -40,6 +40,7 @@ public class CmisRepositoryConnectorUtil
   private static final String SEP = " ";
   private static final String SELECT_STAR_CLAUSE = "select *";
   private static final String OBJECT_ID_TERM = "cmis:objectId,";
+  private static final String SELECT_CLAUSE_TERM_SEP = ",";
   
   public static final String getDocumentURL(final Document document, final Session session)
{
     String link = null;
@@ -66,24 +67,7 @@ public class CmisRepositoryConnectorUtil
    */
   public static String getCmisQueryWithObjectId(String cmisQuery){
     String cmisQueryResult = StringUtils.EMPTY;
-    StringTokenizer cmisQueryTokenized = new StringTokenizer(cmisQuery.trim());
-    String selectClause = StringUtils.EMPTY;
-    boolean firstTerm = true;
-    while(cmisQueryTokenized.hasMoreElements()){
-        String term = cmisQueryTokenized.nextToken();
-        if(!term.equalsIgnoreCase(FROM_TOKEN)){
-          if(firstTerm){
-            selectClause+=term;
-            firstTerm = false;
-          } else {
-            selectClause+=SEP+term;
-          }
-          
-        } else {
-          break;
-        }
-    }
-    
+    String selectClause = getSelectClause(cmisQuery);
     if(selectClause.equalsIgnoreCase(SELECT_STAR_CLAUSE)){
       cmisQueryResult = cmisQuery;
     } else {
@@ -103,8 +87,63 @@ public class CmisRepositoryConnectorUtil
       }
       cmisQueryResult = StringUtils.replaceOnce(cmisQuery, secondTerm, OBJECT_ID_TERM + secondTerm);
     }
-    
     return cmisQueryResult;
   }
+
+  /**
+   * Utility method to understand if a property must be indexed or not
+   * @param cmisQuery
+   * @param propertyId
+   * @return TRUE if the property is included in the select clause of the query, otherwise
it will return FALSE
+   */
+  public static boolean existsInSelectClause(String cmisQuery, String propertyId) {
+    String selectClause = getSelectClause(cmisQuery);
+    if(selectClause.startsWith(SELECT_STAR_CLAUSE)){
+      return true;
+    } else {
+      StringTokenizer cmisQueryTokenized = new StringTokenizer(cmisQuery.trim());
+      while(cmisQueryTokenized.hasMoreElements()){
+          String term = cmisQueryTokenized.nextToken();
+          if(!term.equalsIgnoreCase(FROM_TOKEN)){
+            if(term.equalsIgnoreCase(propertyId)){
+              return true;
+            } else if(StringUtils.contains(term, SELECT_CLAUSE_TERM_SEP)){
+              //in this case means that we have: select cmis:objectId,cmis:name from ...
+              StringTokenizer termsTokenized = new StringTokenizer(term, SELECT_CLAUSE_TERM_SEP);
+              while(termsTokenized.hasMoreElements()){
+                String termTokenized = termsTokenized.nextToken().trim();
+                if(termTokenized.equalsIgnoreCase(propertyId)){
+                  return true;
+                }
+              }
+            }
+          } else {
+            break;
+          }
+      }
+      return false;
+    }
+  }
+
+  private static String getSelectClause(String cmisQuery) {
+    StringTokenizer cmisQueryTokenized = new StringTokenizer(cmisQuery.trim());
+    String selectClause = StringUtils.EMPTY;
+    boolean firstTerm = true;
+    while(cmisQueryTokenized.hasMoreElements()){
+        String term = cmisQueryTokenized.nextToken();
+        if(!term.equalsIgnoreCase(FROM_TOKEN)){
+          if(firstTerm){
+            selectClause+=term;
+            firstTerm = false;
+          } else {
+            selectClause+=SEP+term;
+          }
+          
+        } else {
+          break;
+        }
+    }
+    return selectClause;
+  }
   
 }
\ No newline at end of file



Mime
View raw message