Return-Path: X-Original-To: apmail-manifoldcf-commits-archive@www.apache.org Delivered-To: apmail-manifoldcf-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id ECD8E11ED9 for ; Wed, 18 Jun 2014 18:21:22 +0000 (UTC) Received: (qmail 42268 invoked by uid 500); 18 Jun 2014 18:21:22 -0000 Delivered-To: apmail-manifoldcf-commits-archive@manifoldcf.apache.org Received: (qmail 42219 invoked by uid 500); 18 Jun 2014 18:21:22 -0000 Mailing-List: contact commits-help@manifoldcf.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@manifoldcf.apache.org Delivered-To: mailing list commits@manifoldcf.apache.org Received: (qmail 42210 invoked by uid 99); 18 Jun 2014 18:21:22 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 18 Jun 2014 18:21:22 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 18 Jun 2014 18:21:23 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 2843123889D5; Wed, 18 Jun 2014 18:20:58 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1603561 - /manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java Date: Wed, 18 Jun 2014 18:20:58 -0000 To: commits@manifoldcf.apache.org From: kwright@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140618182058.2843123889D5@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: kwright Date: Wed Jun 18 18:20:57 2014 New Revision: 1603561 URL: http://svn.apache.org/r1603561 Log: Add TikaExtractor class (not done yet) Added: manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java (with props) Added: manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java?rev=1603561&view=auto ============================================================================== --- manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java (added) +++ manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java Wed Jun 18 18:20:57 2014 @@ -0,0 +1,109 @@ +/* $Id$ */ + +/** +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.manifoldcf.agents.transformation.tika; + +import org.apache.manifoldcf.core.interfaces.*; +import org.apache.manifoldcf.agents.interfaces.*; + +import java.io.*; +import java.util.*; + +/** This connector works as a transformation connector, but does nothing other than logging. +* +*/ +public class TikaExtractor extends org.apache.manifoldcf.agents.transformation.BaseTransformationConnector +{ + public static final String _rcsid = "@(#)$Id$"; + + protected static final String ACTIVITY_EXTRACT = "extract"; + + protected static final String[] activitiesList = new String[]{ACTIVITY_EXTRACT}; + + /** Return a list of activities that this connector generates. + * The connector does NOT need to be connected before this method is called. + *@return the set of activities. + */ + @Override + public String[] getActivitiesList() + { + return activitiesList; + } + + /** Add (or replace) a document in the output data store using the connector. + * This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be + * necessary. + * The OutputSpecification is *not* provided to this method, because the goal is consistency, and if output is done it must be consistent with the + * output description, since that was what was partly used to determine if output should be taking place. So it may be necessary for this method to decode + * an output description string in order to determine what should be done. + *@param documentURI is the URI of the document. The URI is presumed to be the unique identifier which the output data store will use to process + * and serve the document. This URI is constructed by the repository connector which fetches the document, and is thus universal across all output connectors. + *@param outputDescription is the description string that was constructed for this document by the getOutputDescription() method. + *@param document is the document data to be processed (handed to the output data store). + *@param authorityNameString is the name of the authority responsible for authorizing any access tokens passed in with the repository document. May be null. + *@param activities is the handle to an object that the implementer of a pipeline connector may use to perform operations, such as logging processing activity, + * or sending a modified document to the next stage in the pipeline. + *@return the document status (accepted or permanently rejected). + *@throws IOException only if there's a stream error reading the document data. + */ + @Override + public int addOrReplaceDocumentWithException(String documentURI, String pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities) + throws ManifoldCFException, ServiceInterruption, IOException + { + long startTime = System.currentTimeMillis(); + String resultCode = "OK"; + String description = null; + Long length = null; + try + { + // MHL to actually hook up tika + long binaryLength = document.getBinaryLength(); + int rval = activities.sendDocument(documentURI,document,authorityNameString); + length = new Long(binaryLength); + resultCode = (rval == DOCUMENTSTATUS_ACCEPTED)?"ACCEPTED":"REJECTED"; + return rval; + } + catch (ServiceInterruption e) + { + resultCode = "SERVICEINTERRUPTION"; + description = e.getMessage(); + throw e; + } + catch (ManifoldCFException e) + { + resultCode = "EXCEPTION"; + description = e.getMessage(); + throw e; + } + catch (IOException e) + { + resultCode = "IOEXCEPTION"; + description = e.getMessage(); + throw e; + } + finally + { + activities.recordActivity(new Long(startTime), ACTIVITY_EXTRACT, length, documentURI, + resultCode, description); + } + + } + +} + + Propchange: manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: manifoldcf/branches/CONNECTORS-954/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java ------------------------------------------------------------------------------ svn:keywords = Id