manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mlizew...@apache.org
Subject svn commit: r1496848 - in /manifoldcf/trunk: ./ CHANGES.txt build.xml connectors/generic/ connectors/pom.xml pom.xml site/src/documentation/content/xdocs/en_US/end-user-documentation.xml
Date Wed, 26 Jun 2013 09:11:42 GMT
Author: mlizewski
Date: Wed Jun 26 09:11:41 2013
New Revision: 1496848

URL: http://svn.apache.org/r1496848
Log:
merged with CONNECTORS-727 branch which implements Generic API connector

Added:
    manifoldcf/trunk/connectors/generic/   (props changed)
      - copied from r1496843, manifoldcf/branches/CONNECTORS-727/connectors/generic/
Modified:
    manifoldcf/trunk/   (props changed)
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/build.xml
    manifoldcf/trunk/connectors/pom.xml
    manifoldcf/trunk/pom.xml
    manifoldcf/trunk/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml

Propchange: manifoldcf/trunk/
------------------------------------------------------------------------------
  Merged /manifoldcf/branches/CONNECTORS-727:r1495363-1496843

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1496848&r1=1496847&r2=1496848&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Wed Jun 26 09:11:41 2013
@@ -3,6 +3,9 @@ $Id$
 
 ======================= 1.3-dev =====================
 
+CONNECTORS-727: Implemented generic API connector
+{Maciej Lizewski, Karl Wright)
+
 CONNECTORS-734: Catch deadlock error with EXPLAIN ANALYZE in
 postgresql, and ignore it.
 (Ahmet Arslan, Karl Wright)

Modified: manifoldcf/trunk/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/build.xml?rev=1496848&r1=1496847&r2=1496848&view=diff
==============================================================================
--- manifoldcf/trunk/build.xml (original)
+++ manifoldcf/trunk/build.xml Wed Jun 26 09:11:41 2013
@@ -59,6 +59,7 @@
         <ant dir="connectors/alfresco" target="clean"/>
         <ant dir="connectors/cmis" target="clean"/>
         <ant dir="connectors/dropbox" target="clean"/>
+        <ant dir="connectors/generic" target="clean"/>
         <ant dir="connectors/googledrive" target="clean"/>
 		<ant dir="connectors/jira" target="clean"/>
         <ant dir="connectors/activedirectory" target="clean"/>
@@ -112,6 +113,7 @@
         <ant dir="connectors/alfresco" target="clean"/>
         <ant dir="connectors/cmis" target="clean"/>
         <ant dir="connectors/dropbox" target="clean"/>
+        <ant dir="connectors/generic" target="clean"/>
         <ant dir="connectors/googledrive" target="clean"/>
 		<ant dir="connectors/jira" target="clean"/>
         <ant dir="connectors/activedirectory" target="clean"/>
@@ -286,6 +288,8 @@
     
     <target name="setup-dropbox-connector" depends="build-framework" if="downloaded"/>
 
+    <target name="setup-generic-connector" depends="build-framework" if="downloaded"/>
+
     <target name="setup-googledrive-connector" depends="build-framework" if="downloaded"/>
 	<target name="setup-jira-connector" depends="build-framework" if="downloaded"/>
     
@@ -309,6 +313,9 @@
         <ant dir="connectors/dropbox" target="build"/>
     </target>
 
+    <target name="build-generic-connector" depends="setup-generic-connector" if="downloaded">
+        <ant dir="connectors/generic" target="build"/>
+    </target>
 
     <target name="build-googledrive-connector" depends="setup-googledrive-connector" if="downloaded">
         <ant dir="connectors/googledrive" target="build"/>
@@ -323,6 +330,10 @@
         <ant dir="connectors/dropbox" target="doc"/>
     </target>
 
+    <target name="doc-generic-connector" depends="setup-generic-connector" if="downloaded">
+        <ant dir="connectors/generic" target="doc"/>
+    </target>
+
     <target name="doc-googledrive-connector" depends="setup-googledrive-connector" if="downloaded">
         <ant dir="connectors/googledrive" target="doc"/>
     </target>
@@ -1420,7 +1431,26 @@
         </condition>
     </target>
 
+    <target name="calculate-generic-condition" depends="build-generic-connector">
+        <available file="connectors/generic/dist/lib" type="dir" property="generic.exists"/>
+        <condition property="generic.include">
+            <and>
+                <isset property="generic.exists"/>
+                <isset property="downloaded"/>
+            </and>
+        </condition>
+    </target>
 
+    <target name="calculate-generic-doc-condition" depends="doc-generic-connector">
+        <available file="connectors/generic/dist/doc" type="dir" property="generic-doc.exists"/>
+        <condition property="generic-doc.include">
+            <and>
+                <isset property="generic-doc.exists"/>
+                <isset property="downloaded"/>
+            </and>
+        </condition>
+    </target>
+	
     <target name="calculate-googledrive-condition" depends="build-googledrive-connector">
         <available file="connectors/googledrive/dist/lib" type="dir" property="googledrive.exists"/>
         <condition property="googledrive.include">
@@ -1506,6 +1536,23 @@
         </antcall>
     </target>
 
+    <target name="deliver-generic-connector" depends="calculate-generic-condition" if="generic.include">
+        <antcall target="general-connector-delivery">
+            <param name="connector-name" value="generic"/>
+        </antcall>
+        <antcall target="general-add-repository-connector">
+            <param name="connector-name" value="generic"/>
+            <param name="connector-label" value="generic"/>
+            <param name="connector-class" value="org.apache.manifoldcf.crawler.connectors.generic.GenericRepositoryConnector"/>
+        </antcall>
+    </target>
+    
+    <target name="deliver-generic-connector-doc" depends="calculate-generic-doc-condition"
if="generic-doc.include">
+        <antcall target="general-connector-doc-delivery">
+            <param name="connector-name" value="generic"/>
+        </antcall>
+    </target>
+
     <target name="deliver-googledrive-connector" depends="calculate-googledrive-condition"
if="googledrive.include">
         <antcall target="general-connector-delivery">
             <param name="connector-name" value="googledrive"/>
@@ -2698,8 +2745,8 @@
     <target name="end-to-end-loadtests-HSQLDB" depends="run-filesystem-loadtests-HSQLDB,run-rss-loadtests-HSQLDB,run-wiki-loadtests-HSQLDB,run-alfresco-loadtests-HSQLDB,run-cmis-loadtests-HSQLDB,run-sharepoint-loadtests-HSQLDB"/>
 
 
-    <target name="deliver-open-connectors" depends="deliver-jira-connector,deliver-googledrive-connector,deliver-dropbox-connector,deliver-nullauthority-connector,deliver-activedirectory-connector,deliver-ldap-connector,deliver-alfresco-connector,deliver-cmis-connector,deliver-filesystem-connector,deliver-rss-connector,deliver-webcrawler-connector,deliver-wiki-connector,deliver-jdbc-connector"/>
-    <target name="deliver-open-connectors-doc" depends="deliver-googledrive-connector-doc,deliver-jira-connector-doc,deliver-dropbox-connector-doc,deliver-nullauthority-connector-doc,deliver-activedirectory-connector-doc,deliver-ldap-connector-doc,deliver-alfresco-connector-doc,deliver-cmis-connector-doc,deliver-filesystem-connector-doc,deliver-rss-connector-doc,deliver-webcrawler-connector-doc,deliver-wiki-connector-doc,deliver-jdbc-connector-doc"/>
+    <target name="deliver-open-connectors" depends="deliver-generic-connector,deliver-jira-connector,deliver-googledrive-connector,deliver-dropbox-connector,deliver-nullauthority-connector,deliver-activedirectory-connector,deliver-ldap-connector,deliver-alfresco-connector,deliver-cmis-connector,deliver-filesystem-connector,deliver-rss-connector,deliver-webcrawler-connector,deliver-wiki-connector,deliver-jdbc-connector"/>
+    <target name="deliver-open-connectors-doc" depends="deliver-generic-connector-doc,deliver-jira-connector-doc,deliver-googledrive-connector-doc,deliver-dropbox-connector-doc,deliver-nullauthority-connector-doc,deliver-activedirectory-connector-doc,deliver-ldap-connector-doc,deliver-alfresco-connector-doc,deliver-cmis-connector-doc,deliver-filesystem-connector-doc,deliver-rss-connector-doc,deliver-webcrawler-connector-doc,deliver-wiki-connector-doc,deliver-jdbc-connector-doc"/>
     
     <target name="deliver-output-connectors" depends="deliver-gts-connector,deliver-solr-connector,deliver-nulloutput-connector,deliver-opensearchserver-connector,deliver-elasticsearch-connector"/>
     <target name="deliver-output-connectors-doc" depends="deliver-gts-connector-doc,deliver-solr-connector-doc,deliver-nulloutput-connector-doc,deliver-opensearchserver-connector-doc,deliver-elasticsearch-connector-doc"/>
@@ -3706,7 +3753,6 @@ Use Apache Forrest version forrest-0.9-d
             <param name="artifact-type" value="jar"/>
         </antcall>
     </target>
-    
 	
 	<target name="download-jira-client">
         <mkdir dir="lib"/>
@@ -3855,6 +3901,7 @@ Use Apache Forrest version forrest-0.9-d
     <target name="make-deps" depends="download-proprietary-dependencies">
         <ant dir="connectors/alfresco" target="download-dependencies"/>
         <ant dir="connectors/cmis" target="download-dependencies"/>
+        <ant dir="connectors/generic" target="download-dependencies"/>
         <ant dir="connectors/dropbox" target="download-dependencies"/>
         <ant dir="connectors/googledrive" target="download-dependencies"/>
 		<ant dir="connectors/jira" target="download-dependencies"/>
@@ -3894,6 +3941,7 @@ Use Apache Forrest version forrest-0.9-d
     <target name="clean-deps" depends="download-proprietary-cleanup">
         <ant dir="connectors/alfresco" target="download-cleanup"/>
         <ant dir="connectors/cmis" target="download-cleanup"/>
+        <ant dir="connectors/generic" target="download-cleanup"/>        
         <ant dir="connectors/dropbox" target="download-cleanup"/>        
         <ant dir="connectors/googledrive" target="download-cleanup"/>
 		<ant dir="connectors/jira" target="download-cleanup"/>

Propchange: manifoldcf/trunk/connectors/generic/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Wed Jun 26 09:11:41 2013
@@ -0,0 +1,3 @@
+target
+build
+dist

Modified: manifoldcf/trunk/connectors/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/pom.xml?rev=1496848&r1=1496847&r2=1496848&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/pom.xml (original)
+++ manifoldcf/trunk/connectors/pom.xml Wed Jun 26 09:11:41 2013
@@ -53,6 +53,7 @@
     <module>dropbox</module>
     <module>googledrive</module>
     <module>jira</module>
+    <module>generic</module>
   </modules>
 
 </project>

Modified: manifoldcf/trunk/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/pom.xml?rev=1496848&r1=1496847&r2=1496848&view=diff
==============================================================================
--- manifoldcf/trunk/pom.xml (original)
+++ manifoldcf/trunk/pom.xml Wed Jun 26 09:11:41 2013
@@ -64,6 +64,7 @@
     <json.version>20090211</json.version>
     <velocity.version>1.7</velocity.version>
     <slf4j.version>1.6.6</slf4j.version>
+	<jaxb.version>2.2.6</jaxb.version>
   </properties>
 
   <modules>

Modified: manifoldcf/trunk/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml?rev=1496848&r1=1496847&r2=1496848&view=diff
==============================================================================
--- manifoldcf/trunk/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml
(original)
+++ manifoldcf/trunk/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml
Wed Jun 26 09:11:41 2013
@@ -2189,6 +2189,96 @@ curl -XGET http://localhost:9200/index/_
               <br/><br/>
             </section>
             
+            <section id="genericconnector">
+              <title>Generic Connector</title>
+              <p>Generic connector allows you to index any source that follows provided
API specification. The idea is that you can use it and implement only the API which is designed
+			  to be fine grained and as simple as it is possible to handle document indexing.</p>
+              <p>API should be implemented as xml web page (entry point) returning
results based on provided GET params. It may be a simple server script or part of the bigger
application.
+			  API can be secured with HTTP basic authentication.</p>
+              <br/>
+              <p>There are 4 actions:</p>
+			  <ul>
+				<li>check</li>
+				<li>seed</li>
+				<li>items</li>
+				<li>item</li>
+			  </ul>
+			  <p>Action is passed as "action" GET param to the entrypoint.</p>
+              <br/><br/>
+			  <p><b>[entrypoint]?action=check</b></p>
+			  <p>Should return HTTP status code 200 providing information that entrypoint is
working properly. Any content returned will be ignored, only the status code matters.</p>
+              <br/><br/>
+			  
+			  <p><b>[entrypoint]?action=seed&amp;startDate=YYYY-MM-DDTHH:mm:ssZ&amp;endDate=YYYY-MM-DDTHH:mm:ssZ</b></p>
+			  <p>Parameters:</p>
+			  <ul>
+				<li>startDate - the start of time frame which should be applied to returned seeds.
If this is a first run - this parameter will not be provided meaning that all documents should
be returned.</li>
+				<li>endDate - the end of time frame. Always provided.</li>
+			  </ul>
+			  <p><code>startDate</code> and <code>endDate</code> parameters
are encoded as <code>YYYY-MM-DD'T'HH:mm:ss'Z'</code>. Result should be valid XML
of form:</p>
+			  <source>
+&lt;seeds>
+   &lt;seed id="document_id_1" /&gt;
+   &lt;seed id="document_id_2" /&gt;
+   ...
+&lt;/seeds&gt;
+			  </source>
+			  <p>Attributes <code>id</code> are required.</p>
+              <br/><br/>
+
+			  <p><b>[entrypoint]?action=items&id[]=document_id_1&id=document_id_2</b></p>
+			  <p>Parameters:</p>
+			  <ul>
+				<li>id[] - array of document IDs that should be returned</li>
+			  </ul>
+			  <p>Result should be valid XML of form:</p>
+			  <source>
+&lt;items&gt;
+   &lt;item id="document_id_1"&gt;
+      &lt;url&gt;[http://document_uri]&lt;/url&gt;
+      &lt;version&gt;[document_version]&lt;/version&gt;
+	  &lt;created&gt;2013-11-11T21:00:00Z&lt;/created&gt;
+	  &lt;updated&gt;2013-11-11T21:00:00Z&lt;/updated&gt;
+	  &lt;filename&gt;filename.ext&lt;/filename&gt;
+	  &lt;mimetype&gt;mime/type&lt;/mimetype&gt;
+	  &lt;metadata&gt;
+	     &lt;meta name="meta_name_1"&gt;meta_value_1&lt;/meta&gt;
+	     &lt;meta name="meta_name_2"&gt;meta_value_2&lt;/meta&gt;
+		 ...
+	  &lt;/metadata&gt;
+	  &lt;auth&gt;
+		 &lt;token&gt;auth_token_1&lt;/token&gt;
+		 &lt;token&gt;auth_token_2&lt;/token&gt;
+		 ...
+	  &lt;/auth&gt;
+	  &lt;related&gt;
+		 &lt;id&gt;other_document_id_1&lt;/id&gt;
+		 &lt;id&gt;other_document_id_2&lt;/id&gt;
+		 ...
+	  &lt;/related&gt;
+	  &lt;content&gt;Document content&lt;/content&gt;
+   &lt;/item&gt;
+   ...
+&lt;/items&gt;
+			  </source>
+			  <p><code>id, url, version</code> are required, the rest is optional.</p>
+ 			  <p>If <code>auth</code> tag is provided - document will be treated
as non-public with defined access tokens, if it is ommited - document will be public.</p>
+			  <p>If <code>content</code> tag is ommited - connector will ask for
document content as <code>action=item</code> separate API call.</p>
+			  <p>You may provide related document ids when document repository is a graph or
tree. Provided documents will also be indexed. In case you want to use relations -
+			  seeding do not have to return all documents, only starting points. Rest of documents
will be fetched using relations.</p>
+              <br/><br/>
+
+			  <p><b>[entrypoint]?action=item&id=document_id</b></p>
+			  <p>Parameters:</p>
+			  <ul>
+				<li>id - requested document ID</li>
+			  </ul>
+			  <p>Result should be the document content. It does not have to be XML - you may
return binary data (PDF, DOC, etc) which represent the document.</p>
+              <br/><br/>
+			  <p>You may provide custom parameters by defining them in Job specification. All
defined parameters will be sent as additional GET parameters with every API call</p>
+			  <p>You may override provided auth tokens and define forced tokens in Job specification.
If you change security model to "forced" and do not provide any tokens - all documents will
be public.</p>
+              <br/><br/>
+            </section>
         </section>
 
 </body>



Mime
View raw message