manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1603561 - /manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java
Date Wed, 18 Jun 2014 18:20:58 GMT
Author: kwright
Date: Wed Jun 18 18:20:57 2014
New Revision: 1603561

URL: http://svn.apache.org/r1603561
Log:
Add TikaExtractor class (not done yet)

Added:
    manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java
  (with props)

Added: manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java?rev=1603561&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java
(added)
+++ manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java
Wed Jun 18 18:20:57 2014
@@ -0,0 +1,109 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.transformation.tika;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.agents.interfaces.*;
+
+import java.io.*;
+import java.util.*;
+
+/** This connector works as a transformation connector, but does nothing other than logging.
+*
+*/
+public class TikaExtractor extends org.apache.manifoldcf.agents.transformation.BaseTransformationConnector
+{
+  public static final String _rcsid = "@(#)$Id$";
+
+  protected static final String ACTIVITY_EXTRACT = "extract";
+
+  protected static final String[] activitiesList = new String[]{ACTIVITY_EXTRACT};
+  
+  /** Return a list of activities that this connector generates.
+  * The connector does NOT need to be connected before this method is called.
+  *@return the set of activities.
+  */
+  @Override
+  public String[] getActivitiesList()
+  {
+    return activitiesList;
+  }
+
+  /** Add (or replace) a document in the output data store using the connector.
+  * This method presumes that the connector object has been configured, and it is thus able
to communicate with the output data store should that be
+  * necessary.
+  * The OutputSpecification is *not* provided to this method, because the goal is consistency,
and if output is done it must be consistent with the
+  * output description, since that was what was partly used to determine if output should
be taking place.  So it may be necessary for this method to decode
+  * an output description string in order to determine what should be done.
+  *@param documentURI is the URI of the document.  The URI is presumed to be the unique identifier
which the output data store will use to process
+  * and serve the document.  This URI is constructed by the repository connector which fetches
the document, and is thus universal across all output connectors.
+  *@param outputDescription is the description string that was constructed for this document
by the getOutputDescription() method.
+  *@param document is the document data to be processed (handed to the output data store).
+  *@param authorityNameString is the name of the authority responsible for authorizing any
access tokens passed in with the repository document.  May be null.
+  *@param activities is the handle to an object that the implementer of a pipeline connector
may use to perform operations, such as logging processing activity,
+  * or sending a modified document to the next stage in the pipeline.
+  *@return the document status (accepted or permanently rejected).
+  *@throws IOException only if there's a stream error reading the document data.
+  */
+  @Override
+  public int addOrReplaceDocumentWithException(String documentURI, String pipelineDescription,
RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+    throws ManifoldCFException, ServiceInterruption, IOException
+  {
+    long startTime = System.currentTimeMillis();
+    String resultCode = "OK";
+    String description = null;
+    Long length = null;
+    try
+    {
+      // MHL to actually hook up tika
+      long binaryLength = document.getBinaryLength();
+      int rval = activities.sendDocument(documentURI,document,authorityNameString);
+      length =  new Long(binaryLength);
+      resultCode = (rval == DOCUMENTSTATUS_ACCEPTED)?"ACCEPTED":"REJECTED";
+      return rval;
+    }
+    catch (ServiceInterruption e)
+    {
+      resultCode = "SERVICEINTERRUPTION";
+      description = e.getMessage();
+      throw e;
+    }
+    catch (ManifoldCFException e)
+    {
+      resultCode = "EXCEPTION";
+      description = e.getMessage();
+      throw e;
+    }
+    catch (IOException e)
+    {
+      resultCode = "IOEXCEPTION";
+      description = e.getMessage();
+      throw e;
+    }
+    finally
+    {
+      activities.recordActivity(new Long(startTime), ACTIVITY_EXTRACT, length, documentURI,
+        resultCode, description);
+    }
+
+  }
+
+}
+
+

Propchange: manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java
------------------------------------------------------------------------------
    svn:keywords = Id



Mime
View raw message