Return-Path: X-Original-To: apmail-incubator-ctakes-commits-archive@minotaur.apache.org Delivered-To: apmail-incubator-ctakes-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 80368E2BF for ; Tue, 20 Nov 2012 17:37:59 +0000 (UTC) Received: (qmail 56282 invoked by uid 500); 20 Nov 2012 17:37:59 -0000 Delivered-To: apmail-incubator-ctakes-commits-archive@incubator.apache.org Received: (qmail 56258 invoked by uid 500); 20 Nov 2012 17:37:59 -0000 Mailing-List: contact ctakes-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: ctakes-dev@incubator.apache.org Delivered-To: mailing list ctakes-commits@incubator.apache.org Received: (qmail 56247 invoked by uid 99); 20 Nov 2012 17:37:59 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 20 Nov 2012 17:37:59 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 20 Nov 2012 17:37:57 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 9E48C2388A32; Tue, 20 Nov 2012 17:37:37 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1411756 - /incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/XmiWriterCasConsumerCtakes.java Date: Tue, 20 Nov 2012 17:37:37 -0000 To: ctakes-commits@incubator.apache.org From: mattcoarr@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20121120173737.9E48C2388A32@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: mattcoarr Date: Tue Nov 20 17:37:37 2012 New Revision: 1411756 URL: http://svn.apache.org/viewvc?rev=1411756&view=rev Log: an xmi writer that can be used in a cpe pipeline (this one uses filename from the ctakes common type system to name the output file, whereas the uima examples one uses the sample's filename annotation) Added: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/XmiWriterCasConsumerCtakes.java Added: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/XmiWriterCasConsumerCtakes.java URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/XmiWriterCasConsumerCtakes.java?rev=1411756&view=auto ============================================================================== --- incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/XmiWriterCasConsumerCtakes.java (added) +++ incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/XmiWriterCasConsumerCtakes.java Tue Nov 20 17:37:37 2012 @@ -0,0 +1,214 @@ +package org.apache.ctakes.core.cc; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; + +import org.apache.uima.UIMAFramework; +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.CASException; +import org.apache.uima.cas.FSIterator; +import org.apache.uima.cas.impl.XmiCasSerializer; +import org.apache.uima.collection.CasConsumerDescription; +import org.apache.uima.collection.CasConsumer_ImplBase; +import org.apache.uima.examples.SourceDocumentInformation; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; +import org.apache.uima.resource.ResourceProcessException; +import org.apache.uima.util.InvalidXMLException; +import org.apache.uima.util.UriUtils; +import org.apache.uima.util.XMLInputSource; +import org.apache.uima.util.XMLSerializer; +import org.xml.sax.SAXException; + +import org.apache.ctakes.core.util.DocumentIDAnnotationUtil; +import org.apache.ctakes.typesystem.type.structured.DocumentID; + +/** + * A simple CAS consumer that writes the CAS to XMI format. + *

+ * This CAS Consumer takes one parameter: + *

    + *
  • OutputDirectory - path to directory into which output files will be written
  • + *
+ */ +public class XmiWriterCasConsumerCtakes extends CasConsumer_ImplBase { + /** + * Name of configuration parameter that must be set to the path of a directory into which the + * output files will be written. + */ + public static final String PARAM_OUTPUTDIR = "OutputDirectory"; + + private File mOutputDir; + + private int mDocNum; + + public void initialize() throws ResourceInitializationException { + mDocNum = 0; + mOutputDir = new File((String) getConfigParameterValue(PARAM_OUTPUTDIR)); + if (!mOutputDir.exists()) { + mOutputDir.mkdirs(); + } + } + + /** + * Processes the CAS which was populated by the TextAnalysisEngines.
+ * In this case, the CAS is converted to XMI and written into the output file . + * + * @param aCAS + * a CAS which has been populated by the TAEs + * + * @throws ResourceProcessException + * if there is an error in processing the Resource + * + * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS) + */ + public void processCas(CAS aCAS) throws ResourceProcessException { + String modelFileName = null; + + JCas jcas; + try { + jcas = aCAS.getJCas(); + } catch (CASException e) { + throw new ResourceProcessException(e); + } + + String originalFileName = DocumentIDAnnotationUtil.getDocumentID(jcas); + File outFile = null; + if (originalFileName != null && !originalFileName.isEmpty()) + { + File inFile; + try + { + String outFileName = null; + if (originalFileName.contains("/")) + { + URI uri = UriUtils.quote(originalFileName); + inFile = new File(uri); + outFileName = inFile.getName(); + } else + { + outFileName = originalFileName; + } + outFileName += ".xmi"; + outFile = new File(mOutputDir, outFileName); + + } catch (URISyntaxException e) + { + // bad URI, use default processing below + } + + } + if (outFile == null) { + outFile = new File(mOutputDir, "doc" + mDocNum++ + ".xmi"); // Jira UIMA-629 + } + // serialize XCAS and write to output file + try { + writeXmi(jcas.getCas(), outFile, modelFileName); + } catch (IOException e) { + throw new ResourceProcessException(e); + } catch (SAXException e) { + throw new ResourceProcessException(e); + } + } + + /** + * Serialize a CAS to a file in XMI format + * + * @param aCas + * CAS to serialize + * @param name + * output file + * @throws SAXException + * @throws Exception + * + * @throws ResourceProcessException + */ + private void writeXmi(CAS aCas, File name) throws IOException, SAXException { + FileOutputStream out = null; + + try { + // write XMI + out = new FileOutputStream(name); + XmiCasSerializer ser = new XmiCasSerializer(aCas.getTypeSystem()); + XMLSerializer xmlSer = new XMLSerializer(out, false); + ser.serialize(aCas, xmlSer.getContentHandler()); + } finally { + if (out != null) { + out.close(); + } + } + } + + /** + * Serialize a CAS to a file in XMI format + * + * @param aCas + * CAS to serialize + * @param name + * output file + * @throws SAXException + * @throws Exception + * + * @throws ResourceProcessException + */ + private void writeXmi(CAS aCas, File name, String modelFileName) throws IOException, SAXException { + FileOutputStream out = null; + + try { + // write XMI + out = new FileOutputStream(name); + XmiCasSerializer ser = new XmiCasSerializer(aCas.getTypeSystem()); + XMLSerializer xmlSer = new XMLSerializer(out, false); + ser.serialize(aCas, xmlSer.getContentHandler()); + } finally { + if (out != null) { + out.close(); + } + } + } + + /** + * Parses and returns the descriptor for this collection reader. The descriptor is stored in the + * uima.jar file and located using the ClassLoader. + * + * @return an object containing all of the information parsed from the descriptor. + * + * @throws InvalidXMLException + * if the descriptor is invalid or missing + */ + public static CasConsumerDescription getDescription() throws InvalidXMLException { + InputStream descStream = XmiWriterCasConsumerCtakes.class + .getResourceAsStream("XmiWriterCasConsumerCtakes.xml"); + return UIMAFramework.getXMLParser().parseCasConsumerDescription( + new XMLInputSource(descStream, null)); + } + + public static URL getDescriptorURL() { + return XmiWriterCasConsumerCtakes.class.getResource("XmiWriterCasConsumerCtakes.xml"); + } +}