manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1596720 - in /manifoldcf/branches/CONNECTORS-916-rebased: ./ connectors/ connectors/amazoncloudsearch/ connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/ connectors/amazoncloudsearch...
Date Thu, 22 May 2014 00:57:29 GMT
Author: kwright
Date: Thu May 22 00:57:29 2014
New Revision: 1596720

URL: http://svn.apache.org/r1596720
Log:
Pull up CONNECTORS-916 changes

Added:
    manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/
      - copied from r1585531, manifoldcf/branches/CONNECTORS-916-2/connectors/amazoncloudsearch/
    manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConfig.java
      - copied unchanged from r1596719, manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConfig.java
    manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/Messages.java
      - copied unchanged from r1596719, manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/Messages.java
    manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/connector/src/main/native2ascii/
      - copied from r1596719, manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/native2ascii/
    manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/connector/src/main/resources/
      - copied from r1596719, manifoldcf/branches/CONNECTORS-916/connectors/amazoncloudsearch/connector/src/main/resources/
Modified:
    manifoldcf/branches/CONNECTORS-916-rebased/   (props changed)
    manifoldcf/branches/CONNECTORS-916-rebased/build.xml
    manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/build.xml
    manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
    manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/pom.xml
    manifoldcf/branches/CONNECTORS-916-rebased/connectors/pom.xml
    manifoldcf/branches/CONNECTORS-916-rebased/lib-license/LICENSE.txt

Propchange: manifoldcf/branches/CONNECTORS-916-rebased/
------------------------------------------------------------------------------
  Merged /manifoldcf/branches/CONNECTORS-916:r1585365-1585526,1585532-1596719
  Merged /manifoldcf/branches/CONNECTORS-916-2:r1585527-1585531

Modified: manifoldcf/branches/CONNECTORS-916-rebased/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-916-rebased/build.xml?rev=1596720&r1=1596719&r2=1596720&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-916-rebased/build.xml (original)
+++ manifoldcf/branches/CONNECTORS-916-rebased/build.xml Thu May 22 00:57:29 2014
@@ -1023,7 +1023,7 @@ Use Apache Forrest version forrest-0.9-d
         <mkdir dir="lib"/>
         <antcall target="download-via-maven"><param name="target" value="lib"/>
             <param name="project-path" value="org/apache/poi"/>
-            <param name="artifact-version" value="3.7"/>
+            <param name="artifact-version" value="3.10-beta2"/>
             <param name="artifact-name" value="poi"/>
             <param name="artifact-type" value="jar"/>
         </antcall>
@@ -1579,7 +1579,213 @@ Use Apache Forrest version forrest-0.9-d
         </antcall>
     </target>
     
-    <target name="make-core-deps" depends="download-jira-client,download-google-api-client,download-dropbox-client,download-solrj,download-zookeeper,download-httpcomponents,download-json,download-hsqldb,download-xerces,download-commons,download-elasticsearch-plugin,download-solr-plugins,download-sharepoint-plugins,download-jstl,download-xmlgraphics-commons,download-wstx-asl,download-xmlsec,download-xml-apis,download-wss4j,download-velocity,download-streambuffer,download-stax,download-servlet-api,download-xml-resolver,download-osgi,download-opensaml,download-mimepull,download-mail,download-log4j,download-junit,download-jaxws,download-glassfish,download-jaxb,download-tomcat,download-h2,download-h2-support,download-geronimo-specs,download-fop,download-derby,download-postgresql,download-axis,download-saaj,download-wsdl4j,download-castor,download-jetty,download-slf4j,download-xalan,download-activation,download-avalon-framework,download-poi,download-chemistry,download-ecj,download-hadoop,
 download-protobuf">
+    <target name="download-tika">
+        <mkdir dir="lib"/>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/apache/tika"/>
+            <param name="artifact-version" value="1.5"/>
+            <param name="artifact-name" value="tika-core"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/apache/tika"/>
+            <param name="artifact-version" value="1.5"/>
+            <param name="artifact-name" value="tika-parsers"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/ccil/cowan/tagsoup"/>
+            <param name="artifact-version" value="1.2.1"/>
+            <param name="artifact-name" value="tagsoup"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/apache/james"/>
+            <param name="artifact-name" value="apache-mime4j-core"/>
+            <param name="artifact-version" value="0.7.2"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/apache/james"/>
+            <param name="artifact-name" value="apache-mime4j-dom"/>
+            <param name="artifact-version" value="0.7.2"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/gagravarr"/>
+            <param name="artifact-name" value="vorbis-java-tika"/>
+            <param name="artifact-version" value="0.1"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/apache/poi"/>
+            <param name="artifact-name" value="poi-scratchpad"/>
+            <param name="artifact-version" value="3.10-beta2"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="com/drewnoakes"/>
+            <param name="artifact-name" value="metadata-extractor"/>
+            <param name="artifact-version" value="2.6.2"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/aspectj"/>
+            <param name="artifact-name" value="aspectjrt"/>
+            <param name="artifact-version" value="1.6.11"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="com/uwyn"/>
+            <param name="artifact-name" value="jhighlight"/>
+            <param name="artifact-version" value="1.0"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/apache/xmlbeans"/>
+            <param name="artifact-name" value="xmlbeans"/>
+            <param name="artifact-version" value="2.3.0"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/bouncycastle"/>
+            <param name="artifact-name" value="bcprov-jdk15"/>
+            <param name="artifact-version" value="1.45"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/tukaani"/>
+            <param name="artifact-name" value="xz"/>
+            <param name="artifact-version" value="1.2"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/apache/pdfbox"/>
+            <param name="artifact-name" value="jempbox"/>
+            <param name="artifact-version" value="1.8.4"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/apache/pdfbox"/>
+            <param name="artifact-name" value="pdfbox"/>
+            <param name="artifact-version" value="1.8.4"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/apache/pdfbox"/>
+            <param name="artifact-name" value="fontbox"/>
+            <param name="artifact-version" value="1.8.4"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/apache/commons"/>
+            <param name="artifact-name" value="commons-compress"/>
+            <param name="artifact-version" value="1.5"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="com/googlecode/juniversalchardet"/>
+            <param name="artifact-name" value="juniversalchardet"/>
+            <param name="artifact-version" value="1.0.3"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="edu/ucar"/>
+            <param name="artifact-name" value="netcdf"/>
+            <param name="artifact-version" value="4.2-min"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="de/l3s/boilerpipe"/>
+            <param name="artifact-name" value="boilerpipe"/>
+            <param name="artifact-version" value="1.1.0"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="dom4j"/>
+            <param name="artifact-name" value="dom4j"/>
+            <param name="artifact-version" value="1.6.1"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/ow2/asm"/>
+            <param name="artifact-name" value="asm-debug-all"/>
+            <param name="artifact-version" value="4.1"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="com/adobe/xmp"/>
+            <param name="artifact-name" value="xmpcore"/>
+            <param name="artifact-version" value="5.1.2"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/gagravarr"/>
+            <param name="artifact-name" value="vorbis-java-core"/>
+            <param name="artifact-version" value="0.1"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/apache/poi"/>
+            <param name="artifact-name" value="poi-ooxml"/>
+            <param name="artifact-version" value="3.10-beta2"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/apache/poi"/>
+            <param name="artifact-name" value="poi-ooxml-schemas"/>
+            <param name="artifact-version" value="3.10-beta2"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/bouncycastle"/>
+            <param name="artifact-name" value="bcmail-jdk15"/>
+            <param name="artifact-version" value="1.45"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="jdom"/>
+            <param name="artifact-name" value="jdom"/>
+            <param name="artifact-version" value="1.0"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="org/apache/geronimo/specs"/>
+            <param name="artifact-name" value="geronimo-stax-api_1.0_spec"/>
+            <param name="artifact-version" value="1.0.1"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="rome"/>
+            <param name="artifact-name" value="rome"/>
+            <param name="artifact-version" value="0.9"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="com/googlecode/mp4parser"/>
+            <param name="artifact-name" value="isoparser"/>
+            <param name="artifact-version" value="1.0-RC-1"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+    </target>
+	
+    <target name="download-jackson">
+        <mkdir dir="lib"/>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="com/fasterxml/jackson/core"/>
+            <param name="artifact-version" value="2.1.3"/>
+            <param name="artifact-name" value="jackson-databind"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+        <antcall target="download-via-maven"><param name="target" value="lib"/>
+            <param name="project-path" value="com/fasterxml/jackson/core"/>
+            <param name="artifact-version" value="2.1.2"/>
+            <param name="artifact-name" value="jackson-annotations"/>
+            <param name="artifact-type" value="jar"/>
+        </antcall>
+    </target>
+	
+    <target name="make-core-deps" depends="download-jira-client,download-google-api-client,download-dropbox-client,download-solrj,download-zookeeper,download-httpcomponents,download-json,download-hsqldb,download-xerces,download-commons,download-elasticsearch-plugin,download-solr-plugins,download-sharepoint-plugins,download-jstl,download-xmlgraphics-commons,download-wstx-asl,download-xmlsec,download-xml-apis,download-wss4j,download-velocity,download-streambuffer,download-stax,download-servlet-api,download-xml-resolver,download-osgi,download-opensaml,download-mimepull,download-mail,download-log4j,download-junit,download-jaxws,download-glassfish,download-jaxb,download-tomcat,download-h2,download-h2-support,download-geronimo-specs,download-fop,download-derby,download-postgresql,download-axis,download-saaj,download-wsdl4j,download-castor,download-jetty,download-slf4j,download-xalan,download-activation,download-avalon-framework,download-poi,download-chemistry,download-ecj,download-hadoop,
 download-protobuf,download-tika,download-jackson">
         <copy todir="lib">
             <fileset dir="lib-license" includes="*.txt"/>
         </copy>

Modified: manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/build.xml?rev=1596720&r1=1585531&r2=1596720&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/build.xml (original)
+++ manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/build.xml Thu May 22 00:57:29 2014
@@ -36,10 +36,43 @@
             <include name="httpclient*.jar"/>
             <include name="jackson-core*.jar"/>
             <include name="jackson-databind*.jar"/>
-        	<include name="jackson-annotations*.jar"/>
+            <include name="jackson-annotations*.jar"/>
             <include name="tika-core*.jar"/>
             <include name="tika-parsers*.jar"/>
             <include name="tagsoup*.jar"/>
+            <include name="poi*.jar"/>
+            <include name="vorbis-java-tika*.jar"/>
+            <include name="vorbis-java-core*.jar"/>
+            <include name="netcdf*.jar"/>
+            <include name="apache-mime4j-core*.jar"/>
+            <include name="apache-mime4j-dom*.jar"/>
+            <include name="commons-compress*.jar"/>
+            <include name="commons-codec*.jar"/>
+            <include name="pdfbox*.jar"/>
+            <include name="fontbox*.jar"/>
+            <include name="jempbox*.jar"/>
+            <include name="commons-logging*.jar"/>
+            <include name="bcmail-jdk15*.jar"/>
+            <include name="bcprov-jdk15*.jar"/>
+            <include name="poi-scratchpad*.jar"/>
+            <include name="poi-ooxml*.jar"/>
+            <include name="poi-ooxml-schemas*.jar"/>
+            <include name="xmlbeans*.jar"/>
+            <include name="dom4j*.jar"/>
+            <include name="geronimo-stax-api_1.0_spec*.jar"/>
+            <include name="asm-debug-all*.jar"/>
+            <include name="isoparser*.jar"/>
+            <include name="aspectjrt*.jar"/>
+            <include name="metadata-extractor*.jar"/>
+            <include name="xmpcore*.jar"/>
+            <include name="xml-apis*.jar"/>
+            <include name="boilerpipe*.jar"/>
+            <include name="rome*.jar"/>
+            <include name="jdom*.jar"/>
+            <include name="xercesImpl*.jar"/>
+            <include name="vorbis-java-core*.jar"/>
+            <include name="juniversalchardet*.jar"/>
+            <include name="jhighlight*.jar"/>
         </fileset>
     </path>
 
@@ -47,13 +80,46 @@
         <mkdir dir="dist/lib"/>
         <copy todir="dist/lib">
             <fileset dir="../../lib">
-            	<include name="httpclient*.jar"/>
-            	<include name="jackson-core*.jar"/>
-            	<include name="jackson-databind*.jar"/>
-            	<include name="jackson-annotations*.jar"/>
+                <include name="httpclient*.jar"/>
+                <include name="jackson-core*.jar"/>
+                <include name="jackson-databind*.jar"/>
+                <include name="jackson-annotations*.jar"/>
                 <include name="tika-core*.jar"/>
                 <include name="tika-parsers*.jar"/>
                 <include name="tagsoup*.jar"/>
+                <include name="poi*.jar"/>
+                <include name="vorbis-java-tika*.jar"/>
+                <include name="vorbis-java-core*.jar"/>
+                <include name="netcdf*.jar"/>
+                <include name="apache-mime4j-core*.jar"/>
+                <include name="apache-mime4j-dom*.jar"/>
+                <include name="commons-compress*.jar"/>
+                <include name="commons-codec*.jar"/>
+                <include name="pdfbox*.jar"/>
+                <include name="fontbox*.jar"/>
+                <include name="jempbox*.jar"/>
+                <include name="commons-logging*.jar"/>
+                <include name="bcmail-jdk15*.jar"/>
+                <include name="bcprov-jdk15*.jar"/>
+                <include name="poi-scratchpad*.jar"/>
+                <include name="poi-ooxml*.jar"/>
+                <include name="poi-ooxml-schemas*.jar"/>
+                <include name="xmlbeans*.jar"/>
+                <include name="dom4j*.jar"/>
+                <include name="geronimo-stax-api_1.0_spec*.jar"/>
+                <include name="asm-debug-all*.jar"/>
+                <include name="isoparser*.jar"/>
+                <include name="aspectjrt*.jar"/>
+                <include name="metadata-extractor*.jar"/>
+                <include name="xmpcore*.jar"/>
+                <include name="xml-apis*.jar"/>
+                <include name="boilerpipe*.jar"/>
+                <include name="rome*.jar"/>
+                <include name="jdom*.jar"/>
+                <include name="xercesImpl*.jar"/>
+                <include name="vorbis-java-core*.jar"/>
+                <include name="juniversalchardet*.jar"/>
+                <include name="jhighlight*.jar"/>
             </fileset>
         </copy>
     </target>

Modified: manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java?rev=1596720&r1=1585531&r2=1596720&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java Thu May 22 00:57:29 2014
@@ -17,26 +17,31 @@
 * limitations under the License.
 */
 package org.apache.manifoldcf.agents.output.amazoncloudsearch;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InterruptedIOException;
+import java.io.StringReader;
+import java.io.BufferedReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
+import java.util.Locale;
+import java.util.Set;
+import java.util.HashSet;
 
+import org.apache.commons.io.FilenameUtils;
 import org.apache.http.Consts;
 import org.apache.http.HttpEntity;
-import org.apache.http.HttpHost;
-import org.apache.http.HttpResponse;
-import org.apache.http.client.ClientProtocolException;
-import org.apache.http.client.HttpClient;
-import org.apache.http.client.config.RequestConfig;
-import org.apache.http.client.methods.HttpPost;
-import org.apache.http.entity.StringEntity;
-import org.apache.http.impl.DefaultHttpClientConnection;
+import org.apache.http.HttpHost;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.entity.StringEntity;
 import org.apache.http.impl.client.CloseableHttpClient;
-import org.apache.http.impl.client.DefaultHttpClient;
 import org.apache.http.impl.client.HttpClients;
 import org.apache.http.util.EntityUtils;
 import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity;
@@ -46,15 +51,21 @@ import org.apache.manifoldcf.agents.inte
 import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
 import org.apache.manifoldcf.agents.output.BaseOutputConnector;
 import org.apache.manifoldcf.agents.output.amazoncloudsearch.SDFModel.Document;
-import org.apache.manifoldcf.core.interfaces.ConfigParams;
-import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+import org.apache.manifoldcf.core.interfaces.ConfigParams;
+import org.apache.manifoldcf.core.interfaces.ConfigurationNode;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+import org.apache.manifoldcf.core.interfaces.IThreadContext;
+import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
+import org.apache.manifoldcf.core.interfaces.IPostParameters;
+import org.apache.manifoldcf.core.interfaces.IPasswordMapperActivity;
+import org.apache.manifoldcf.core.interfaces.SpecificationNode;
+import org.apache.manifoldcf.core.system.ManifoldCF;
+import org.apache.manifoldcf.crawler.system.Logging;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.ParsingReader;
-import org.apache.tika.parser.html.HtmlParser;
+import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.sax.BodyContentHandler;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
@@ -64,17 +75,40 @@ import com.fasterxml.jackson.core.JsonPa
 import com.fasterxml.jackson.core.JsonParser;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.core.JsonToken;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ObjectNode;
 
-public class AmazonCloudSearchConnector  extends BaseOutputConnector {
+public class AmazonCloudSearchConnector extends BaseOutputConnector {
 
   /** Ingestion activity */
   public final static String INGEST_ACTIVITY = "document ingest";
   /** Document removal activity */
   public final static String REMOVE_ACTIVITY = "document deletion";
 
+  /** Forward to the javascript to check the configuration parameters */
+  private static final String EDIT_CONFIGURATION_JS = "editConfiguration.js";
+
+  /** Forward to the HTML template to edit the configuration parameters */
+  private static final String EDIT_CONFIGURATION_HTML = "editConfiguration.html";
+  
+  /** Forward to the HTML template to view the configuration parameters */
+  private static final String VIEW_CONFIGURATION_HTML = "viewConfiguration.html";
+
+  /** Forward to the javascript to check the specification parameters for the job */
+  private static final String EDIT_SPECIFICATION_JS = "editSpecification.js";
+  
+  private static final String EDIT_SPECIFICATION_CONTENTS_HTML = "editSpecification_Contents.html";
+  private static final String EDIT_SPECIFICATION_FIELDMAPPING_HTML = "editSpecification_FieldMapping.html";
+  
+  private static final String VIEW_SPECIFICATION_HTML = "viewSpecification.html";
+  
   /** Local connection */
   protected HttpPost poster = null;
-
+  
+  /** cloudsearch field name for file body text. */
+  private static final String FILE_BODY_TEXT_FIELDNAME = "f_bodytext";
+  
   /** Constructor.
    */
   public AmazonCloudSearchConnector(){
@@ -127,20 +161,34 @@ public class AmazonCloudSearchConnector 
   protected void getSession()
     throws ManifoldCFException
   {
-    //curl -X POST --upload-file data1.json doc.movies-123456789012.us-east-1.cloudsearch.amazonaws.com/2013-01-01/documents/batch --header "Content-Type:application/json"
-    String documentEndpointUrl = "doc-test1-hjzolhfixtfctmuaezbzinjduu.us-east-1.cloudsearch.amazonaws.com";
-    String urlStr = "https://" + documentEndpointUrl + "/2013-01-01/documents/batch";
+    String serverHost = params.getParameter(AmazonCloudSearchConfig.SERVER_HOST);
+    if (serverHost == null)
+      throw new ManifoldCFException("Server host parameter required");
+    String serverPath = params.getParameter(AmazonCloudSearchConfig.SERVER_PATH);
+    if (serverPath == null)
+      throw new ManifoldCFException("Server path parameter required");
+    String proxyProtocol = params.getParameter(AmazonCloudSearchConfig.PROXY_PROTOCOL);
+    String proxyHost = params.getParameter(AmazonCloudSearchConfig.PROXY_HOST);
+    String proxyPort = params.getParameter(AmazonCloudSearchConfig.PROXY_PORT);
+    
+    // Https is OK here without a custom trust store because we know we are talking to an Amazon instance, which has certs that
+    // are presumably non-custom.
+    String urlStr = "https://" + serverHost + serverPath;
     poster = new HttpPost(urlStr);
     
     //set proxy
-    String proxyHost = System.getenv().get("HTTP_PROXY");
-    if(proxyHost != null)
+    if(proxyHost != null && proxyHost.length() > 0)
     {
-      String host = proxyHost.substring(proxyHost.indexOf("://")+3,proxyHost.lastIndexOf(":"));
-      String port = proxyHost.substring(proxyHost.lastIndexOf(":")+1,proxyHost.length()-1);
-      HttpHost proxy = new HttpHost(host, Integer.parseInt(port), "http");
-      RequestConfig config = RequestConfig.custom().setProxy(proxy).build();
-      poster.setConfig(config);
+      try
+      {
+        HttpHost proxy = new HttpHost(proxyHost, Integer.parseInt(proxyPort), proxyProtocol);
+        RequestConfig config = RequestConfig.custom().setProxy(proxy).build();
+        poster.setConfig(config);
+      }
+      catch (NumberFormatException e)
+      {
+        throw new ManifoldCFException("Number format exception: "+e.getMessage(),e);
+      }
     }
     
     poster.addHeader("Content-Type", "application/json");
@@ -150,47 +198,78 @@ public class AmazonCloudSearchConnector 
   *@return the connection's status as a displayable string.
   */
   @Override
-  public String check()
-    throws ManifoldCFException
-  {
-    try
-    {
+  public String check() throws ManifoldCFException {
+    try {
       getSession();
       String responsbody = postData("[]");
-      
       String status = "";
+      try
+      {
+        status = getStatusFromJsonResponse(responsbody);
+      } catch (ManifoldCFException e)
+      {
+        Logging.connectors.debug(e);
+        return "Could not get status from response body. Check Access Policy setting of your domain of Amazon CloudSearch.: " + e.getMessage();
+      }
+          
+      // check status message
       String message = "";
-      JsonFactory factory = new JsonFactory();
-      JsonParser parser = factory.createJsonParser(responsbody);
-      while (parser.nextToken() != JsonToken.END_OBJECT) {
-        String name = parser.getCurrentName();
-        if("status".equalsIgnoreCase(name)){
-          status = parser.getValueAsString();
-        }else if("errors".equalsIgnoreCase(name)){
-          message = parseMessage(parser);
+      if ("error".equals(status)) {
+        JsonParser parser = new JsonFactory().createJsonParser(responsbody);
+        while (parser.nextToken() != JsonToken.END_OBJECT) {
+          String name = parser.getCurrentName();
+          if ("errors".equalsIgnoreCase(name)) {
+            message = parseMessage(parser);
+            break;
+          }
         }
       }
-      if("error".equalsIgnoreCase(status) &&
-          "Encountered unexpected end of file".equals(message)){
+      if ("error".equalsIgnoreCase(status)
+          && "batch must contain at least one operation".equals(message)) {
         return "Connection working.";
       }
       return "Connection NOT working.";
-    }
-    catch (ClientProtocolException e) {
-      throw new ManifoldCFException(e);
+      
+    } catch (ClientProtocolException e) {
+      Logging.connectors.debug(e);
+      return "Protocol exception: "+e.getMessage();
     } catch (IOException e) {
-      throw new ManifoldCFException(e);
-    }
-  }
-  
-  private String parseMessage(JsonParser parser) throws JsonParseException, IOException {
-    while(parser.nextToken() != JsonToken.END_ARRAY){
-      String name = parser.getCurrentName();
-      if("message".equalsIgnoreCase(name)){
-        return parser.getValueAsString();
-      }
+      Logging.connectors.debug(e);
+      return "IO exception: "+e.getMessage();
+    } catch (ServiceInterruption e) {
+      Logging.connectors.debug(e);
+      return "Transient exception: "+e.getMessage();
     }
-    return null;
+  }
+  
+  private String getStatusFromJsonResponse(String responsbody) throws ManifoldCFException {
+    try {
+      JsonParser parser = new JsonFactory().createJsonParser(responsbody);
+      while (parser.nextToken() != JsonToken.END_OBJECT)
+      {
+        String name = parser.getCurrentName();
+        if("status".equalsIgnoreCase(name)){
+          parser.nextToken();
+          return parser.getText();
+        }
+      }
+    } catch (JsonParseException e) {
+      throw new ManifoldCFException(e);
+    } catch (IOException e) {
+      throw new ManifoldCFException(e);
+    }
+    return null;
+  }
+  
+  private String parseMessage(JsonParser parser) throws JsonParseException, IOException {
+    while(parser.nextToken() != JsonToken.END_ARRAY){
+      String name = parser.getCurrentName();
+      if("message".equalsIgnoreCase(name)){
+        parser.nextToken();
+        return parser.getText();
+      }
+    }
+    return null;
   }
 
   /** Get an output version string, given an output specification.  The output version string is used to uniquely describe the pertinent details of
@@ -200,18 +279,18 @@ public class AmazonCloudSearchConnector 
   *
   * This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
   * necessary.
-  *@param spec is the current output specification for the job that is doing the crawling.
+  *@param os is the current output specification for the job that is doing the crawling.
   *@return a string, of unlimited length, which uniquely describes output configuration and specification in such a way that if two such strings are equal,
   * the document will not need to be sent again to the output data store.
   */
   @Override
-  public String getOutputDescription(OutputSpecification spec)
+  public String getOutputDescription(OutputSpecification os)
     throws ManifoldCFException, ServiceInterruption
   {
-    return "";
+    SpecPacker sp = new SpecPacker(os);
+    return sp.toPackedString();
   }
 
-
   /** Detect if a mime type is indexable or not.  This method is used by participating repository connectors to pre-filter the number of
   * unusable documents that will be passed to this output connector.
   *@param outputDescription is the document's output version.
@@ -221,14 +300,33 @@ public class AmazonCloudSearchConnector 
   public boolean checkMimeTypeIndexable(String outputDescription, String mimeType)
     throws ManifoldCFException, ServiceInterruption
   {
-    getSession();
-    
-    if(("text/html").equalsIgnoreCase(mimeType)){
-      return super.checkMimeTypeIndexable(outputDescription,mimeType);
-    }
-    return false;
+    SpecPacker sp = new SpecPacker(outputDescription);
+    if (sp.checkMimeType(mimeType))
+      return super.checkMimeTypeIndexable(outputDescription, mimeType);
+    else
+      return false;
   }
 
+  @Override
+  public boolean checkLengthIndexable(String outputDescription, long length)
+    throws ManifoldCFException, ServiceInterruption {
+    SpecPacker sp = new SpecPacker(outputDescription);
+    if (sp.checkLengthIndexable(length))
+      return super.checkLengthIndexable(outputDescription, length);
+    else
+      return false;
+  }
+
+  @Override
+  public boolean checkURLIndexable(String outputDescription, String url)
+    throws ManifoldCFException, ServiceInterruption {
+    SpecPacker sp = new SpecPacker(outputDescription);
+    if (sp.checkURLIndexable(url))
+      return super.checkURLIndexable(outputDescription, url);
+    else
+      return false;
+  }
+  
   /** Add (or replace) a document in the output data store using the connector.
   * This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
   * necessary.
@@ -246,71 +344,116 @@ public class AmazonCloudSearchConnector 
   @Override
   public int addOrReplaceDocument(String documentURI, String outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
     throws ManifoldCFException, ServiceInterruption
-  {
-    // Establish a session
+  {
+    // Establish a session
     getSession();
+    
+    SpecPacker sp = new SpecPacker(outputDescription);
+    
+    String jsondata = "";
     try {
-      InputStream is = document.getBinaryStream();
-      Parser parser = new HtmlParser();
-      ContentHandler handler = new BodyContentHandler();
-      Metadata metadata = new Metadata();
-      parser.parse(is, handler, metadata, new ParseContext());
-      
       //build json..
       SDFModel model = new SDFModel();
       Document doc = model.new Document();
       doc.setType("add");
-      doc.setId(documentURI);
+      doc.setId(ManifoldCF.hash(documentURI));
+      
+      HashMap fields = new HashMap();
+      Metadata metadata = extractBinaryFile(document, fields);
+      
+      Iterator<String> itr = document.getFields();
+      while(itr.hasNext())
+      {
+        String fName = itr.next();
+        Object[] value = document.getField(fName);
+        String target = sp.getMapping(fName);
+        if(target!=null)
+        {
+          fields.put(target, value);
+        }
+        else
+        {
+          if(sp.keepAllMetadata())
+          {
+            fields.put(fName, value);
+          }
+        }
+      }
       
-      //set body text.
-      Map<String,Object> fields = new HashMap<String,Object>();
-      String bodyStr = handler.toString();
-      if(bodyStr != null){
-        bodyStr = handler.toString().replaceAll("\\n", "").replaceAll("\\t", "");
-        fields.put("body", bodyStr);
-      }
-      
-      //mapping metadata to SDF fields.
-      String contenttype = metadata.get("Content-Style-Type");
-      String title = metadata.get("dc:title");
-      String size = metadata.get("Content-Length");
-      String description = metadata.get("description");
-      String keywords = metadata.get("keywords");
-      if(contenttype != null && !"".equals(contenttype)) fields.put("content_type", contenttype);
-      if(title != null && !"".equals(title)) fields.put("title", title);
-      if(size != null && !"".equals(size)) fields.put("size", size);
-      if(description != null && !"".equals(description)) fields.put("description", description);
-      if(keywords != null && !"".equals(keywords))
-      {
-        List<String> keywordList = new ArrayList<String>();
-        for(String tmp : keywords.split(",")){
-          keywordList.add(tmp);
+      //metadata of binary files.
+      String[] metaNames = metadata.names();
+      for(String mName : metaNames){
+        String value = metadata.get(mName);
+        String target = sp.getMapping(mName);
+        if(target!=null)
+        {
+          fields.put(target, value);
+        }
+        else
+        {
+          if(sp.keepAllMetadata())
+          {
+            fields.put(mName, value);
+          }
         }
-        fields.put("keywords", keywordList);
       }
       doc.setFields(fields);
-      model.addDocument(doc);
-      
-      //generate json data.
-      String jsondata = model.toJSON();
-      
-      //post data..
-      String responsbody = postData(jsondata);
-            
-      activities.recordActivity(null,INGEST_ACTIVITY,new Long(document.getBinaryLength()),documentURI,"OK",null);
-      return DOCUMENTSTATUS_ACCEPTED;
-      
-    } catch (IOException e) {
-      e.printStackTrace();
-    } catch (SAXException e) {
-      e.printStackTrace();
-    } catch (TikaException e) {
-      e.printStackTrace();
-    }
-    return DOCUMENTSTATUS_REJECTED;
+      model.addDocument(doc);
+      
+      //generate json data.
+      jsondata = model.toJSON();
+    } 
+    catch (SAXException e) {
+      // if document data could not be converted to JSON by jackson.
+      Logging.connectors.debug(e);
+      throw new ManifoldCFException(e);
+    } catch (JsonProcessingException e) {
+      // if document data could not be converted to JSON by jackson.
+      Logging.connectors.debug(e);
+      throw new ManifoldCFException(e);
+    } catch (TikaException e) {
+      // if document could not be parsed by tika.
+      Logging.connectors.debug(e);
+      return DOCUMENTSTATUS_REJECTED;
+    } catch (IOException e) {
+      // if document data could not be read when the document parsing by tika.
+      Logging.connectors.debug(e);
+      throw new ManifoldCFException(e);
+    }
+    
+    //post data..
+    String responsbody = postData(jsondata);
+    
+    // check status
+    String status = getStatusFromJsonResponse(responsbody);
+    if("success".equals(status))
+    {
+      activities.recordActivity(null,INGEST_ACTIVITY,new Long(document.getBinaryLength()),documentURI,"OK",null);
+      return DOCUMENTSTATUS_ACCEPTED;
+    }
+    else {
+      throw new ManifoldCFException("recieved error status from service after feeding document. response body : " + responsbody);
+    }
   }
 
-  /** Remove a document using the connector.
+  private Metadata extractBinaryFile(RepositoryDocument document, HashMap fields)
+      throws IOException, SAXException, TikaException {
+    
+    //extract body text and metadata fields from binary file.
+    InputStream is = document.getBinaryStream();
+    Parser parser = new AutoDetectParser();
+    ContentHandler handler = new BodyContentHandler();
+    Metadata metadata = new Metadata();
+    parser.parse(is, handler, metadata, new ParseContext());
+    String bodyStr = handler.toString();
+    if(bodyStr != null){
+      bodyStr = handler.toString().replaceAll("\\n", "").replaceAll("\\t", "");
+      fields.put(FILE_BODY_TEXT_FIELDNAME, bodyStr);
+    }
+    return metadata;
+  }
+
+  /** Remove a document using the connector.
   * Note that the last outputDescription is included, since it may be necessary for the connector to use such information to know how to properly remove the document.
   *@param documentURI is the URI of the document.  The URI is presumed to be the unique identifier which the output data store will use to process
   * and serve the document.  This URI is constructed by the repository connector which fetches the document, and is thus universal across all output connectors.
@@ -334,33 +477,650 @@ public class AmazonCloudSearchConnector 
       jsonData = model.toJSON();
     } catch (JsonProcessingException e) {
       throw new ManifoldCFException(e);
-    }
-    String responsbody = postData(jsonData);
+    }
+    String responsbody = postData(jsonData);
+    
+    // check status
+    String status = getStatusFromJsonResponse(responsbody);
+    if("success".equals(status))
+    {
+      activities.recordActivity(null,REMOVE_ACTIVITY,null,documentURI,"OK",null);
+    }
+    else {
+      throw new ManifoldCFException("recieved error status from service after feeding document.");
+    }
+  }
+
+  /**
+   * Fill in a Server tab configuration parameter map for calling a Velocity
+   * template.
+   *
+   * @param newMap is the map to fill in
+   * @param parameters is the current set of configuration parameters
+   */
+  private static void fillInServerConfigurationMap(Map<String, Object> newMap, IPasswordMapperActivity mapper, ConfigParams parameters) {
+    String serverhost = parameters.getParameter(AmazonCloudSearchConfig.SERVER_HOST);
+    String serverpath = parameters.getParameter(AmazonCloudSearchConfig.SERVER_PATH);
+    String proxyprotocol = parameters.getParameter(AmazonCloudSearchConfig.PROXY_PROTOCOL);
+    String proxyhost = parameters.getParameter(AmazonCloudSearchConfig.PROXY_HOST);
+    String proxyport = parameters.getParameter(AmazonCloudSearchConfig.PROXY_PORT);
+
+    if (serverhost == null)
+      serverhost = AmazonCloudSearchConfig.SERVER_HOST_DEFAULT;
+    if (serverpath == null)
+      serverpath = AmazonCloudSearchConfig.SERVER_PATH_DEFAULT;
+    if (proxyprotocol == null)
+      proxyprotocol = AmazonCloudSearchConfig.PROXY_PROTOCOL_DEFAULT;
+    if (proxyhost == null)
+      proxyhost = AmazonCloudSearchConfig.PROXY_HOST_DEFAULT;
+    if (proxyport == null)
+      proxyport = AmazonCloudSearchConfig.PROXY_PORT_DEFAULT;
+
+    newMap.put("SERVERHOST", serverhost);
+    newMap.put("SERVERPATH", serverpath);
+    newMap.put("PROXYPROTOCOL", proxyprotocol);
+    newMap.put("PROXYHOST", proxyhost);
+    newMap.put("PROXYPORT", proxyport);
+  }
+
+  /**
+   * View configuration. This method is called in the body section of the
+   * connector's view configuration page. Its purpose is to present the
+   * connection information to the user. The coder can presume that the HTML
+   * that is output from this configuration will be within appropriate <html>
+   * and <body> tags.
+   *
+   * @param threadContext is the local thread context.
+   * @param out is the output to which any HTML should be sent.
+   * @param parameters are the configuration parameters, as they currently
+   * exist, for this connection being configured.
+   */
+  @Override
+  public void viewConfiguration(IThreadContext threadContext, IHTTPOutput out,
+      Locale locale, ConfigParams parameters) throws ManifoldCFException, IOException {
+    Map<String, Object> paramMap = new HashMap<String, Object>();
+
+    // Fill in map from each tab
+    fillInServerConfigurationMap(paramMap, out, parameters);
+
+    Messages.outputResourceWithVelocity(out,locale,VIEW_CONFIGURATION_HTML,paramMap);
+  }
+
+  /**
+   *
+   * Output the configuration header section. This method is called in the
+   * head section of the connector's configuration page. Its purpose is to add
+   * the required tabs to the list, and to output any javascript methods that
+   * might be needed by the configuration editing HTML.
+   *
+   * @param threadContext is the local thread context.
+   * @param out is the output to which any HTML should be sent.
+   * @param parameters are the configuration parameters, as they currently
+   * exist, for this connection being configured.
+   * @param tabsArray is an array of tab names. Add to this array any tab
+   * names that are specific to the connector.
+   */
+  @Override
+  public void outputConfigurationHeader(IThreadContext threadContext,
+      IHTTPOutput out, Locale locale, ConfigParams parameters, List<String> tabsArray)
+      throws ManifoldCFException, IOException {
+    // Add the Server tab
+    tabsArray.add(Messages.getString(locale, "AmazonCloudSearchOutputConnector.ServerTabName"));
+    // Map the parameters
+    Map<String, Object> paramMap = new HashMap<String, Object>();
+
+    // Fill in the parameters from each tab
+    fillInServerConfigurationMap(paramMap, out, parameters);
+        
+    // Output the Javascript - only one Velocity template for all tabs
+    Messages.outputResourceWithVelocity(out,locale,EDIT_CONFIGURATION_JS,paramMap);
+  }
+
+  @Override
+  public void outputConfigurationBody(IThreadContext threadContext,
+      IHTTPOutput out, Locale locale, ConfigParams parameters, String tabName)
+      throws ManifoldCFException, IOException {
+    
+    // Call the Velocity templates for each tab
+    Map<String, Object> paramMap = new HashMap<String, Object>();
     
+    // Set the tab name
+    paramMap.put("TABNAME", tabName);
     
-    activities.recordActivity(null,REMOVE_ACTIVITY,null,documentURI,"OK",null);
+    // Fill in the parameters
+    fillInServerConfigurationMap(paramMap, out, parameters);
+    
+    // Server tab
+    Messages.outputResourceWithVelocity(out,locale,EDIT_CONFIGURATION_HTML,paramMap);
   }
 
-  private String postData(String jsonData) throws ManifoldCFException {
-    CloseableHttpClient httpclient = HttpClients.createDefault();
-    try {
-      poster.setEntity(new StringEntity(jsonData, Consts.UTF_8));
+  /**
+   * Process a configuration post. This method is called at the start of the
+   * connector's configuration page, whenever there is a possibility that form
+   * data for a connection has been posted. Its purpose is to gather form
+   * information and modify the configuration parameters accordingly. The name
+   * of the posted form is "editconnection".
+   *
+   * @param threadContext is the local thread context.
+   * @param variableContext is the set of variables available from the post,
+   * including binary file post information.
+   * @param parameters are the configuration parameters, as they currently
+   * exist, for this connection being configured.
+   * @return null if all is well, or a string error message if there is an
+   * error that should prevent saving of the connection (and cause a
+   * redirection to an error page).
+   *
+   */
+  @Override
+  public String processConfigurationPost(IThreadContext threadContext,
+    IPostParameters variableContext, ConfigParams parameters)
+    throws ManifoldCFException {
+
+    // Server tab parameters
+    String serverhost = variableContext.getParameter("serverhost");
+    if (serverhost != null)
+      parameters.setParameter(AmazonCloudSearchConfig.SERVER_HOST, serverhost);
+    String serverpath = variableContext.getParameter("serverpath");
+    if (serverpath != null)
+      parameters.setParameter(AmazonCloudSearchConfig.SERVER_PATH, serverpath);
+    String proxyprotocol = variableContext.getParameter("proxyprotocol");
+    if (proxyprotocol != null)
+      parameters.setParameter(AmazonCloudSearchConfig.PROXY_PROTOCOL, proxyprotocol);
+    String proxyhost = variableContext.getParameter("proxyhost");
+    if (proxyhost != null)
+      parameters.setParameter(AmazonCloudSearchConfig.PROXY_HOST, proxyhost);
+    String proxyport = variableContext.getParameter("proxyport");
+    if (proxyport != null)
+      parameters.setParameter(AmazonCloudSearchConfig.PROXY_PORT, proxyport);
+
+    return null;
+  }
+
+  private String postData(String jsonData) throws ServiceInterruption, ManifoldCFException {
+    CloseableHttpClient httpclient = HttpClients.createDefault();
+    try {
+      poster.setEntity(new StringEntity(jsonData, Consts.UTF_8));
       HttpResponse res = httpclient.execute(poster);
+      
+      HttpEntity resEntity = res.getEntity();
+      return EntityUtils.toString(resEntity);
+    } catch (ClientProtocolException e) {
+      throw new ManifoldCFException(e);
+    } catch (IOException e) {
+      handleIOException(e);
+    } finally {
+      try {
+        httpclient.close();
+      } catch (IOException e) {
+        //do nothing
+      }
+    }
+    return null;
+  }
+  
+  private static void handleIOException(IOException e)
+      throws ManifoldCFException, ServiceInterruption {
+    if (!(e instanceof java.net.SocketTimeoutException)
+        && (e instanceof InterruptedIOException)) {
+      throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+          ManifoldCFException.INTERRUPTED);
+    }
+    Logging.connectors.warn(
+        "Amazon CloudSearch: IO exception: " + e.getMessage(), e);
+    long currentTime = System.currentTimeMillis();
+    throw new ServiceInterruption("IO exception: " + e.getMessage(), e,
+        currentTime + 300000L, currentTime + 3 * 60 * 60000L, -1, false);
+  }
+  
+  protected static void fillInFieldMappingSpecificationMap(Map<String,Object> paramMap, OutputSpecification os)
+  {
+    // Prep for field mappings
+    List<Map<String,String>> fieldMappings = new ArrayList<Map<String,String>>();
+    String keepAllMetadataValue = "true";
+    for (int i = 0; i < os.getChildCount(); i++)
+    {
+      SpecificationNode sn = os.getChild(i);
+      if (sn.getType().equals(AmazonCloudSearchConfig.NODE_FIELDMAP)) {
+        String source = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE);
+        String target = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_TARGET);
+        String targetDisplay;
+        if (target == null)
+        {
+          target = "";
+          targetDisplay = "(remove)";
+        }
+        else
+          targetDisplay = target;
+        Map<String,String> fieldMapping = new HashMap<String,String>();
+        fieldMapping.put("SOURCE",source);
+        fieldMapping.put("TARGET",target);
+        fieldMapping.put("TARGETDISPLAY",targetDisplay);
+        fieldMappings.add(fieldMapping);
+      }
+      else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_KEEPMETADATA))
+      {
+        keepAllMetadataValue = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
+      }
+    }
+    paramMap.put("FIELDMAPPINGS",fieldMappings);
+    paramMap.put("KEEPALLMETADATA",keepAllMetadataValue);
+  }
+  
+  protected static void fillInContentsSpecificationMap(Map<String,Object> paramMap, OutputSpecification os)
+  {
+    String maxFileSize = AmazonCloudSearchConfig.MAXLENGTH_DEFAULT;
+    String allowedMimeTypes = AmazonCloudSearchConfig.MIMETYPES_DEFAULT;
+    String allowedFileExtensions = AmazonCloudSearchConfig.EXTENSIONS_DEFAULT;
+    for (int i = 0; i < os.getChildCount(); i++)
+    {
+      SpecificationNode sn = os.getChild(i);
+      if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MAXLENGTH))
+        maxFileSize = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
+      else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MIMETYPES))
+        allowedMimeTypes = sn.getValue();
+      else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_EXTENSIONS))
+        allowedFileExtensions = sn.getValue();
+    }
+    paramMap.put("MAXFILESIZE",maxFileSize);
+    paramMap.put("MIMETYPES",allowedMimeTypes);
+    paramMap.put("EXTENSIONS",allowedFileExtensions);
+  }
+  
+  /**
+   * Output the specification header section. This method is called in the head
+   * section of a job page which has selected an output connection of the
+   * current type. Its purpose is to add the required tabs to the list, and to
+   * output any javascript methods that might be needed by the job editing HTML.
+   * 
+   * @param out is the output to which any HTML should be sent.
+   * @param os is the current output specification for this job.
+   * @param tabsArray is an array of tab names. Add to this array any tab names
+   *        that are specific to the connector.
+   */
+  @Override
+  public void outputSpecificationHeader(IHTTPOutput out, Locale locale,
+      OutputSpecification os, List<String> tabsArray)
+      throws ManifoldCFException, IOException
+  {
+    Map<String, Object> paramMap = new HashMap<String, Object>();
+
+    tabsArray.add(Messages.getString(locale, "AmazonCloudSearchOutputConnector.FieldMappingTabName"));
+    tabsArray.add(Messages.getString(locale, "AmazonCloudSearchOutputConnector.ContentsTabName"));
+
+    // Fill in the specification header map, using data from all tabs.
+    fillInFieldMappingSpecificationMap(paramMap, os);
+    fillInContentsSpecificationMap(paramMap, os);
+
+    Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_JS,paramMap);
+  }
+  
+  /** Output the specification body section.
+  * This method is called in the body section of a job page which has selected an output connection of the current type.  Its purpose is to present the required form elements for editing.
+  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
+  * form is "editjob".
+  *@param out is the output to which any HTML should be sent.
+  *@param os is the current output specification for this job.
+  *@param tabName is the current tab name.
+  */
+  @Override
+  public void outputSpecificationBody(IHTTPOutput out, Locale locale, OutputSpecification os, String tabName)
+    throws ManifoldCFException, IOException
+  {
+    Map<String, Object> paramMap = new HashMap<String, Object>();
+
+    // Set the tab name
+    paramMap.put("TABNAME", tabName);
+
+    // Fill in the field mapping tab data
+    fillInFieldMappingSpecificationMap(paramMap, os);
+    fillInContentsSpecificationMap(paramMap, os);
+    Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_CONTENTS_HTML,paramMap);
+    Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_FIELDMAPPING_HTML,paramMap);
+  }
+
+  /** Process a specification post.
+  * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
+  * posted.  Its purpose is to gather form information and modify the output specification accordingly.
+  * The name of the posted form is "editjob".
+  *@param variableContext contains the post data, including binary file-upload information.
+  *@param os is the current output specification for this job.
+  *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
+  */
+  @Override
+  public String processSpecificationPost(IPostParameters variableContext,
+    Locale locale, OutputSpecification os) throws ManifoldCFException {
+    String x;
+        
+    x = variableContext.getParameter("maxfilesize");
+    if (x != null)
+    {
+      int i = 0;
+      while (i < os.getChildCount())
+      {
+        SpecificationNode node = os.getChild(i);
+        if (node.getType().equals(AmazonCloudSearchConfig.NODE_MAXLENGTH))
+          os.removeChild(i);
+        else
+          i++;
+      }
+      SpecificationNode sn = new SpecificationNode(AmazonCloudSearchConfig.NODE_MAXLENGTH);
+      sn.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_VALUE,x);
+      os.addChild(os.getChildCount(),sn);
+    }
+
+    x = variableContext.getParameter("mimetypes");
+    if (x != null)
+    {
+      int i = 0;
+      while (i < os.getChildCount())
+      {
+        SpecificationNode node = os.getChild(i);
+        if (node.getType().equals(AmazonCloudSearchConfig.NODE_MIMETYPES))
+          os.removeChild(i);
+        else
+          i++;
+      }
+      SpecificationNode sn = new SpecificationNode(AmazonCloudSearchConfig.NODE_MIMETYPES);
+      sn.setValue(x);
+      os.addChild(os.getChildCount(),sn);
+    }
+
+    x = variableContext.getParameter("extensions");
+    if (x != null)
+    {
+      int i = 0;
+      while (i < os.getChildCount())
+      {
+        SpecificationNode node = os.getChild(i);
+        if (node.getType().equals(AmazonCloudSearchConfig.NODE_EXTENSIONS))
+          os.removeChild(i);
+        else
+          i++;
+      }
+      SpecificationNode sn = new SpecificationNode(AmazonCloudSearchConfig.NODE_EXTENSIONS);
+      sn.setValue(x);
+      os.addChild(os.getChildCount(),sn);
+    }
+    
+    x = variableContext.getParameter("cloudsearch_fieldmapping_count");
+    if (x != null && x.length() > 0)
+    {
+      // About to gather the fieldmapping nodes, so get rid of the old ones.
+      int i = 0;
+      while (i < os.getChildCount())
+      {
+        SpecificationNode node = os.getChild(i);
+        if (node.getType().equals(AmazonCloudSearchConfig.NODE_FIELDMAP) || node.getType().equals(AmazonCloudSearchConfig.NODE_KEEPMETADATA))
+          os.removeChild(i);
+        else
+          i++;
+      }
+      int count = Integer.parseInt(x);
+      i = 0;
+      while (i < count)
+      {
+        String prefix = "cloudsearch_fieldmapping_";
+        String suffix = "_"+Integer.toString(i);
+        String op = variableContext.getParameter(prefix+"op"+suffix);
+        if (op == null || !op.equals("Delete"))
+        {
+          // Gather the fieldmap etc.
+          String source = variableContext.getParameter(prefix+"source"+suffix);
+          String target = variableContext.getParameter(prefix+"target"+suffix);
+          if (target == null)
+            target = "";
+          SpecificationNode node = new SpecificationNode(AmazonCloudSearchConfig.NODE_FIELDMAP);
+          node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE,source);
+          node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_TARGET,target);
+          os.addChild(os.getChildCount(),node);
+        }
+        i++;
+      }
       
-      HttpEntity resEntity = res.getEntity();
-      return EntityUtils.toString(resEntity);
+      String addop = variableContext.getParameter("cloudsearch_fieldmapping_op");
+      if (addop != null && addop.equals("Add"))
+      {
+        String source = variableContext.getParameter("cloudsearch_fieldmapping_source");
+        String target = variableContext.getParameter("cloudsearch_fieldmapping_target");
+        if (target == null)
+          target = "";
+        SpecificationNode node = new SpecificationNode(AmazonCloudSearchConfig.NODE_FIELDMAP);
+        node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE,source);
+        node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_TARGET,target);
+        os.addChild(os.getChildCount(),node);
+      }
       
-    } catch (ClientProtocolException e) {
-      throw new ManifoldCFException(e);
-    } catch (IOException e) {
-      throw new ManifoldCFException(e);
-    } finally {
-      try {
-        httpclient.close();
-      } catch (IOException e) {
-        //do nothing
+      // Gather the keep all metadata parameter to be the last one
+      SpecificationNode node = new SpecificationNode(AmazonCloudSearchConfig.NODE_KEEPMETADATA);
+      String keepAll = variableContext.getParameter("cloudsearch_keepallmetadata");
+      if (keepAll != null)
+      {
+        node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_VALUE, keepAll);
+      }
+      else
+      {
+        node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_VALUE, "false");
+      }
+      // Add the new keepallmetadata config parameter 
+      os.addChild(os.getChildCount(), node);
+    }
+    
+    return null;
+  }
+  
+
+  /** View specification.
+  * This method is called in the body section of a job's view page.  Its purpose is to present the output specification information to the user.
+  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
+  *@param out is the output to which any HTML should be sent.
+  *@param os is the current output specification for this job.
+  */
+  @Override
+  public void viewSpecification(IHTTPOutput out, Locale locale, OutputSpecification os)
+    throws ManifoldCFException, IOException
+  {
+    Map<String, Object> paramMap = new HashMap<String, Object>();
+
+    // Fill in the map with data from all tabs
+    fillInFieldMappingSpecificationMap(paramMap, os);
+    fillInContentsSpecificationMap(paramMap, os);
+
+    Messages.outputResourceWithVelocity(out,locale,VIEW_SPECIFICATION_HTML,paramMap);
+    
+  }
+  
+  protected static void fillSet(Set<String> set, String input) {
+    try
+    {
+      StringReader sr = new StringReader(input);
+      BufferedReader br = new BufferedReader(sr);
+      String line = null;
+      while ((line = br.readLine()) != null)
+      {
+        line = line.trim();
+        if (line.length() > 0)
+          set.add(line);
       }
     }
+    catch (IOException e)
+    {
+      // Should never happen
+      throw new RuntimeException("IO exception reading strings: "+e.getMessage(),e);
+    }
   }
   
-}
\ No newline at end of file
+  protected static class SpecPacker {
+    
+    private final Map<String,String> sourceTargets = new HashMap<String,String>();
+    private final boolean keepAllMetadata;
+    private final Set<String> extensions = new HashSet<String>();
+    private final Set<String> mimeTypes = new HashSet<String>();
+    private final Long lengthCutoff;
+    
+    public SpecPacker(OutputSpecification os) {
+      boolean keepAllMetadata = true;
+      Long lengthCutoff = null;
+      String extensions = null;
+      String mimeTypes = null;
+      for (int i = 0; i < os.getChildCount(); i++) {
+        SpecificationNode sn = os.getChild(i);
+        
+        if(sn.getType().equals(AmazonCloudSearchConfig.NODE_KEEPMETADATA)) {
+          String value = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
+          keepAllMetadata = Boolean.parseBoolean(value);
+        } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_FIELDMAP)) {
+          String source = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE);
+          String target = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_TARGET);
+          
+          if (target == null) {
+            target = "";
+          }
+          sourceTargets.put(source, target);
+        } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MIMETYPES)) {
+          mimeTypes = sn.getValue();
+        } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_EXTENSIONS)) {
+          extensions = sn.getValue();
+        } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MAXLENGTH)) {
+          String value = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
+          lengthCutoff = new Long(value);
+        }
+      }
+      this.keepAllMetadata = keepAllMetadata;
+      this.lengthCutoff = lengthCutoff;
+      fillSet(this.extensions, extensions);
+      fillSet(this.mimeTypes, mimeTypes);
+    }
+    
+    public SpecPacker(String packedString) {
+      
+      int index = 0;
+      
+      // Mappings
+      final List<String> packedMappings = new ArrayList<String>();
+      index = unpackList(packedMappings,packedString,index,'+');
+      String[] fixedList = new String[2];
+      for (String packedMapping : packedMappings) {
+        unpackFixedList(fixedList,packedMapping,0,':');
+        sourceTargets.put(fixedList[0], fixedList[1]);
+      }
+      
+      // Keep all metadata
+      if (packedString.length() > index)
+        keepAllMetadata = (packedString.charAt(index++) == '+');
+      else
+        keepAllMetadata = true;
+      
+      // Max length
+      final StringBuilder sb = new StringBuilder();
+      if (packedString.length() > index) {
+        if (packedString.charAt(index++) == '+') {
+          index = unpack(sb,packedString,index,'+');
+          this.lengthCutoff = new Long(sb.toString());
+        } else
+          this.lengthCutoff = null;
+      } else
+        this.lengthCutoff = null;
+      
+      // Mime types
+      final List<String> mimeBuffer = new ArrayList<String>();
+      index = unpackList(mimeBuffer,packedString,index,'+');
+      for (String mimeType : mimeBuffer) {
+        this.mimeTypes.add(mimeType);
+      }
+      
+      // Extensions
+      final List<String> extensionsBuffer = new ArrayList<String>();
+      index = unpackList(extensionsBuffer,packedString,index,'+');
+      for (String extension : extensionsBuffer) {
+        this.extensions.add(extension);
+      }
+    }
+    
+    public String toPackedString() {
+      StringBuilder sb = new StringBuilder();
+      int i;
+      
+      // Mappings
+      final String[] sortArray = new String[sourceTargets.size()];
+      i = 0;
+      for (String source : sourceTargets.keySet()) {
+        sortArray[i++] = source;
+      }
+      java.util.Arrays.sort(sortArray);
+      
+      List<String> packedMappings = new ArrayList<String>();
+      String[] fixedList = new String[2];
+      for (String source : sortArray) {
+        String target = sourceTargets.get(source);
+        StringBuilder localBuffer = new StringBuilder();
+        fixedList[0] = source;
+        fixedList[1] = target;
+        packFixedList(localBuffer,fixedList,':');
+        packedMappings.add(localBuffer.toString());
+      }
+      packList(sb,packedMappings,'+');
+
+      // Keep all metadata
+      if (keepAllMetadata)
+        sb.append('+');
+      else
+        sb.append('-');
+      
+      // Max length
+      if (lengthCutoff == null)
+        sb.append('-');
+      else {
+        sb.append('+');
+        pack(sb,lengthCutoff.toString(),'+');
+      }
+      
+      // Mime types
+      String[] mimeTypes = new String[this.mimeTypes.size()];
+      i = 0;
+      for (String mimeType : this.mimeTypes) {
+        mimeTypes[i++] = mimeType;
+      }
+      java.util.Arrays.sort(mimeTypes);
+      packList(sb,mimeTypes,'+');
+      
+      // Extensions
+      String[] extensions = new String[this.extensions.size()];
+      i = 0;
+      for (String extension : this.extensions) {
+        extensions[i++] = extension;
+      }
+      java.util.Arrays.sort(extensions);
+      packList(sb,extensions,'+');
+      
+      return sb.toString();
+    }
+    
+    public boolean checkLengthIndexable(long length) {
+      if (lengthCutoff == null)
+        return true;
+      return (length <= lengthCutoff.longValue());
+    }
+    
+    public boolean checkMimeType(String mimeType) {
+      if (mimeType == null)
+        mimeType = "application/unknown";
+      return mimeTypes.contains(mimeType);
+    }
+    
+    public boolean checkURLIndexable(String url) {
+      String extension = FilenameUtils.getExtension(url);
+      if (extension == null || extension.length() == 0)
+        extension = ".";
+      return extensions.contains(extension);
+    }
+    
+    public String getMapping(String source) {
+      return sourceTargets.get(source);
+    }
+    
+    public boolean keepAllMetadata() {
+      return keepAllMetadata;
+    }
+  }
+  
+}

Modified: manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/pom.xml?rev=1596720&r1=1585531&r2=1596720&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/pom.xml (original)
+++ manifoldcf/branches/CONNECTORS-916-rebased/connectors/amazoncloudsearch/pom.xml Thu May 22 00:57:29 2014
@@ -227,14 +227,8 @@
 	  <groupId>org.apache.tika</groupId>
 	  <artifactId>tika-parsers</artifactId>
 	  <version>1.5</version>
-    </dependency>      
-    <dependency>
-      <groupId>org.ccil.cowan.tagsoup</groupId>
-      <artifactId>tagsoup</artifactId>
-      <version>1.2.1</version>
     </dependency>
     
-    
     <!-- Testing dependencies -->
     
     <dependency>

Modified: manifoldcf/branches/CONNECTORS-916-rebased/connectors/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-916-rebased/connectors/pom.xml?rev=1596720&r1=1596719&r2=1596720&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-916-rebased/connectors/pom.xml (original)
+++ manifoldcf/branches/CONNECTORS-916-rebased/connectors/pom.xml Thu May 22 00:57:29 2014
@@ -57,6 +57,7 @@
     <module>generic</module>
     <module>regexpmapper</module>
     <module>email</module>
+    <module>amazoncloudsearch</module>
   </modules>
 
 </project>

Modified: manifoldcf/branches/CONNECTORS-916-rebased/lib-license/LICENSE.txt
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-916-rebased/lib-license/LICENSE.txt?rev=1596720&r1=1596719&r2=1596720&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-916-rebased/lib-license/LICENSE.txt (original)
+++ manifoldcf/branches/CONNECTORS-916-rebased/lib-license/LICENSE.txt Thu May 22 00:57:29 2014
@@ -178,6 +178,7 @@
 
 Includes software from other Apache Software Foundation projects,
 including, but not limited to:
+  - Apache Tika
   - Apache Tomcat
   - Apache Commons
   - Apache Geronimo
@@ -299,7 +300,7 @@ License: MIT license (http://opensource.
 This product includes a json-simple-1.1.jar.
 License: Apache 2 (http://www.apache.org/licenses/LICENSE-2.0.txt)
 
-This product includes a jackson-core-2.1.3.jar.
+This product includes a jackson-core-2.1.3.jar, jackson-databind-2.1.3.jar, and jackson-annotations-2.1.2.jar.
 License: Dual license; we choose to distribute under Apache 2 (http://www.apache.org/licenses/LICENSE-2.0.txt)
 
 This product includes a google-api-client-1.14.1-beta.jar.
@@ -323,6 +324,9 @@ License: Apache 2 (http://www.apache.org
 This product includes a guava.jar.
 License: Apache 2  (http://www.apache.org/licenses/LICENSE-2.0.txt)
 
+This product includes a tagsoup.jar.
+License: Apache 2 (http://home.ccil.org/~cowan/XML/tagsoup/)
+
 This product may include pdf files that embed IPA-licensed fonts.
 License: IPA Font License Agreement v1.0 (http://ossipedia.ipa.go.jp/ipafont/index.html#LicenseEng)
 



Mime
View raw message