manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1605406 - /manifoldcf/branches/CONNECTORS-981/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java
Date Wed, 25 Jun 2014 13:56:09 GMT
Author: kwright
Date: Wed Jun 25 13:56:08 2014
New Revision: 1605406

URL: http://svn.apache.org/r1605406
Log:
If extracting update handler is off, only accept character content types

Modified:
    manifoldcf/branches/CONNECTORS-981/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java

Modified: manifoldcf/branches/CONNECTORS-981/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-981/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java?rev=1605406&r1=1605405&r2=1605406&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-981/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java
(original)
+++ manifoldcf/branches/CONNECTORS-981/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java
Wed Jun 25 13:56:08 2014
@@ -25,6 +25,8 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.Set;
+import java.util.HashSet;
 
 import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity;
 import org.apache.manifoldcf.agents.interfaces.IOutputNotifyActivity;
@@ -78,6 +80,8 @@ public class SolrConnector extends org.a
   protected String excludedMimeTypesString = null;
   /** Excluded mime types */
   protected Map<String,String> excludedMimeTypes = null;
+  /** Use extractiing update handler? */
+  protected boolean useExtractUpdateHandler = true;
   
   /** Whether or not to commit */
   protected boolean doCommits = false;
@@ -162,6 +166,7 @@ public class SolrConnector extends org.a
     includedMimeTypes = null;
     excludedMimeTypesString = null;
     excludedMimeTypes = null;
+    useExtractUpdateHandler = true;
     super.disconnect();
   }
 
@@ -208,7 +213,7 @@ public class SolrConnector extends org.a
         mimeTypeAttributeName = null;
 
       String contentAttributeName = "content";	// ??? -- should be settable
-      boolean useExtractUpdateHandler = true;   // ???
+      useExtractUpdateHandler = true;   // ???
       
       String commits = params.getParameter(SolrConfig.PARAM_COMMITS);
       if (commits == null || commits.length() == 0)
@@ -467,6 +472,15 @@ public class SolrConnector extends org.a
     return sp.toPackedString();
   }
 
+  private final static Set<String> acceptableMimeTypes = new HashSet<String>();
+  static
+  {
+    acceptableMimeTypes.add("text/plain;charset=utf-8");
+    acceptableMimeTypes.add("text/plain;charset=ascii");
+    acceptableMimeTypes.add("text/plain;charset=us-ascii");
+    acceptableMimeTypes.add("text/plain");
+  }
+
   /** Detect if a mime type is indexable or not.  This method is used by participating repository
connectors to pre-filter the number of
   * unusable documents that will be passed to this output connector.
   *@param outputDescription is the document's output version.
@@ -477,11 +491,15 @@ public class SolrConnector extends org.a
     throws ManifoldCFException, ServiceInterruption
   {
     getSession();
-    if (includedMimeTypes != null && includedMimeTypes.get(mimeType) == null)
-      return false;
-    if (excludedMimeTypes != null && excludedMimeTypes.get(mimeType) != null)
-      return false;
-    return super.checkMimeTypeIndexable(outputDescription,mimeType);
+    if (useExtractUpdateHandler)
+    {
+      if (includedMimeTypes != null && includedMimeTypes.get(mimeType) == null)
+        return false;
+      if (excludedMimeTypes != null && excludedMimeTypes.get(mimeType) != null)
+        return false;
+      return super.checkMimeTypeIndexable(outputDescription,mimeType);
+    }
+    return acceptableMimeTypes.contains(mimeType.toLowerCase(Locale.ROOT));
   }
 
   /** Pre-determine whether a document's length is indexable by this connector.  This method
is used by participating repository connectors



Mime
View raw message