incubator-connectors-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From piergior...@apache.org
Subject svn commit: r1301100 - in /incubator/lcf/branches/CONNECTORS-423: ./ connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/
Date Thu, 15 Mar 2012 17:19:23 GMT
Author: piergiorgio
Date: Thu Mar 15 17:19:23 2012
New Revision: 1301100

URL: http://svn.apache.org/viewvc?rev=1301100&view=rev
Log:
some ElasticSearch work for CONNECTORS-423:
- added the connector-label parameter in the Ant script
- added the support for indexing binaries using the Attachment Plugin
- added metadata support

Modified:
    incubator/lcf/branches/CONNECTORS-423/build.xml
    incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchAction.java
    incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConfig.java
    incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnection.java
    incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnector.java
    incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchDelete.java
    incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchIndex.java
    incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchParam.java
    incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchSpecs.java

Modified: incubator/lcf/branches/CONNECTORS-423/build.xml
URL: http://svn.apache.org/viewvc/incubator/lcf/branches/CONNECTORS-423/build.xml?rev=1301100&r1=1301099&r2=1301100&view=diff
==============================================================================
--- incubator/lcf/branches/CONNECTORS-423/build.xml (original)
+++ incubator/lcf/branches/CONNECTORS-423/build.xml Thu Mar 15 17:19:23 2012
@@ -1634,7 +1634,8 @@
             <param name="connector-name" value="elasticsearch"/>
         </antcall>
         <antcall target="general-add-output-connector">
-            <param name="connector-name" value="ElasticSearch"/>
+            <param name="connector-name" value="elasticsearch"/>
+        	<param name="connector-label" value="ElasticSearch"/>
             <param name="connector-class" value="org.apache.manifoldcf.agents.output.elasticsearch.ElasticSearchConnector"/>
         </antcall>
     </target>

Modified: incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchAction.java
URL: http://svn.apache.org/viewvc/incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchAction.java?rev=1301100&r1=1301099&r2=1301100&view=diff
==============================================================================
--- incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchAction.java
(original)
+++ incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchAction.java
Thu Mar 15 17:19:23 2012
@@ -1,3 +1,5 @@
+/* $Id: ElasticSearchAction.java 1299512 2012-03-12 00:58:38Z piergiorgio $ */
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements. See the NOTICE file distributed with

Modified: incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConfig.java
URL: http://svn.apache.org/viewvc/incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConfig.java?rev=1301100&r1=1301099&r2=1301100&view=diff
==============================================================================
--- incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConfig.java
(original)
+++ incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConfig.java
Thu Mar 15 17:19:23 2012
@@ -1,3 +1,5 @@
+/* $Id: ElasticSearchConfig.java 1299512 2012-03-12 00:58:38Z piergiorgio $ */
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements. See the NOTICE file distributed with

Modified: incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnection.java
URL: http://svn.apache.org/viewvc/incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnection.java?rev=1301100&r1=1301099&r2=1301100&view=diff
==============================================================================
--- incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnection.java
(original)
+++ incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnection.java
Thu Mar 15 17:19:23 2012
@@ -1,3 +1,5 @@
+/* $Id: ElasticSearchConnection.java 1299512 2012-03-12 00:58:38Z piergiorgio $ */
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements. See the NOTICE file distributed with

Modified: incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnector.java
URL: http://svn.apache.org/viewvc/incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnector.java?rev=1301100&r1=1301099&r2=1301100&view=diff
==============================================================================
--- incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnector.java
(original)
+++ incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnector.java
Thu Mar 15 17:19:23 2012
@@ -1,3 +1,5 @@
+/* $Id: ElasticSearchConnector.java 1299512 2012-03-12 00:58:38Z piergiorgio $ */
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements. See the NOTICE file distributed with
@@ -86,8 +88,6 @@ public class ElasticSearchConnector exte
 
   private MultiThreadedHttpConnectionManager connectionManager = null;
   private HttpClient client = null;
-  private String specsCacheOutpuDescription = null;
-  private ElasticSearchSpecs specsCache = null;
 
   public ElasticSearchConnector()
   {
@@ -319,8 +319,8 @@ public class ElasticSearchConnector exte
     ElasticSearchConfig config = getConfigParameters(null);
     InputStream inputStream = document.getBinaryStream();
     long startTime = System.currentTimeMillis();
-    ElasticSearchIndex oi = new ElasticSearchIndex(client, documentURI,
-      inputStream, config);
+    ElasticSearchIndex oi = new ElasticSearchIndex(client, documentURI, 
+        document, inputStream, config);
     activities.recordActivity(startTime, ELASTICSEARCH_INDEXATION_ACTIVITY,
       document.getBinaryLength(), documentURI, oi.getResult().name(),
       oi.getResultDescription());

Modified: incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchDelete.java
URL: http://svn.apache.org/viewvc/incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchDelete.java?rev=1301100&r1=1301099&r2=1301100&view=diff
==============================================================================
--- incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchDelete.java
(original)
+++ incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchDelete.java
Thu Mar 15 17:19:23 2012
@@ -1,3 +1,5 @@
+/* $Id: ElasticSearchDelete.java 1299512 2012-03-12 00:58:38Z piergiorgio $ */
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements. See the NOTICE file distributed with
@@ -17,9 +19,8 @@
 
 package org.apache.manifoldcf.agents.output.elasticsearch;
 
-import org.apache.commons.httpclient.methods.DeleteMethod;
 import org.apache.commons.httpclient.HttpClient;
-import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.httpclient.methods.DeleteMethod;
 import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
 
 public class ElasticSearchDelete extends ElasticSearchConnection

Modified: incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchIndex.java
URL: http://svn.apache.org/viewvc/incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchIndex.java?rev=1301100&r1=1301099&r2=1301100&view=diff
==============================================================================
--- incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchIndex.java
(original)
+++ incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchIndex.java
Thu Mar 15 17:19:23 2012
@@ -1,3 +1,5 @@
+/* $Id: ElasticSearchIndex.java 1299512 2012-03-12 00:58:38Z piergiorgio $ */
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements. See the NOTICE file distributed with
@@ -21,12 +23,13 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.PrintWriter;
+import java.util.Iterator;
 
-import org.apache.commons.httpclient.methods.PutMethod;
 import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.methods.PutMethod;
 import org.apache.commons.httpclient.methods.RequestEntity;
-import org.apache.commons.io.FilenameUtils;
 import org.apache.commons.io.IOUtils;
+import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
 import org.apache.manifoldcf.core.common.Base64;
 import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
 
@@ -36,11 +39,13 @@ public class ElasticSearchIndex extends 
   private class IndexRequestEntity implements RequestEntity
   {
 
+    private RepositoryDocument document;
     private InputStream inputStream;
 
-    public IndexRequestEntity(InputStream inputStream)
+    public IndexRequestEntity(RepositoryDocument document, InputStream inputStream)
       throws ManifoldCFException
     {
+      this.document = document;
       this.inputStream = inputStream;
     }
 
@@ -65,14 +70,41 @@ public class ElasticSearchIndex extends 
       try
       {
         pw.print("{");
-        //pw.print("\"fieldName\" : ");
-        //pw.print("\"" + documentURI + "\"" + ",");
-        //pw.print("\"fileName\" : ");
-        //pw.print("\"" + fileName + "\"" + ",");
-        pw.print("\"binaryValue\" : \"");
-        Base64 base64 = new Base64();
-        base64.encodeStream(inputStream, pw);
-        pw.print("\"");
+        Iterator<String> i = document.getFields();
+        boolean existentFields = false;
+        while (i.hasNext()){
+          String fieldName = i.next();
+          String[] fieldValues = document.getFieldAsStrings(fieldName);
+          if(fieldValues.length>1){
+            for(int j=0; j<fieldValues.length; j++){
+              String fieldValue = fieldValues[j];
+              pw.print("\""+fieldName+"\" : \""+fieldValue+"\"");
+              if(j<fieldValues.length-1){
+                pw.print(",");
+              }
+              existentFields = true;
+            }
+          } else if(fieldValues.length==1){
+            String fieldValue = fieldValues[0];
+            pw.print("\""+fieldName+"\" : \""+fieldValue+"\"");
+            if(i.hasNext()){
+              pw.print(",");
+            }
+            existentFields = true;
+          }
+        }
+        
+        if(inputStream!=null){
+          if(existentFields){
+            pw.print(",");
+          }
+          pw.print("\"type\" : \"attachment\",");
+          pw.print("\"file\" : \"");
+          Base64 base64 = new Base64();
+          base64.encodeStream(inputStream, pw);
+          pw.print("\"");
+        }
+        
         pw.print("}");
       } catch (ManifoldCFException e)
       {
@@ -84,8 +116,8 @@ public class ElasticSearchIndex extends 
     }
   }
 
-  public ElasticSearchIndex(HttpClient client, String documentURI, InputStream inputStream,
-      ElasticSearchConfig config) throws ManifoldCFException
+  public ElasticSearchIndex(HttpClient client, String documentURI, RepositoryDocument document,

+      InputStream inputStream, ElasticSearchConfig config) throws ManifoldCFException
   {
     super(config, client);
     
@@ -101,7 +133,7 @@ public class ElasticSearchIndex extends 
 
     StringBuffer url = getApiUrl(config.getIndexType() + "/" + idField, false);
     PutMethod put = new PutMethod(url.toString());
-    RequestEntity entity = new IndexRequestEntity(inputStream);
+    RequestEntity entity = new IndexRequestEntity(document, inputStream);
     put.setRequestEntity(entity);
     call(put);
     if ("true".equals(checkJson(jsonStatus)))

Modified: incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchParam.java
URL: http://svn.apache.org/viewvc/incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchParam.java?rev=1301100&r1=1301099&r2=1301100&view=diff
==============================================================================
--- incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchParam.java
(original)
+++ incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchParam.java
Thu Mar 15 17:19:23 2012
@@ -1,3 +1,5 @@
+/* $Id: ElasticSearchParam.java 1299512 2012-03-12 00:58:38Z piergiorgio $ */
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements. See the NOTICE file distributed with

Modified: incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchSpecs.java
URL: http://svn.apache.org/viewvc/incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchSpecs.java?rev=1301100&r1=1301099&r2=1301100&view=diff
==============================================================================
--- incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchSpecs.java
(original)
+++ incubator/lcf/branches/CONNECTORS-423/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchSpecs.java
Thu Mar 15 17:19:23 2012
@@ -1,3 +1,5 @@
+/* $Id: ElasticSearchSpecs.java 1299512 2012-03-12 00:58:38Z piergiorgio $ */
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements. See the NOTICE file distributed with



Mime
View raw message