incubator-ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mattco...@apache.org
Subject svn commit: r1411756 - /incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/XmiWriterCasConsumerCtakes.java
Date Tue, 20 Nov 2012 17:37:37 GMT
Author: mattcoarr
Date: Tue Nov 20 17:37:37 2012
New Revision: 1411756

URL: http://svn.apache.org/viewvc?rev=1411756&view=rev
Log:
an xmi writer that can be used in a cpe pipeline (this one uses filename from the ctakes common
type system to name the output file, whereas the uima examples one uses the sample's filename
annotation)

Added:
    incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/XmiWriterCasConsumerCtakes.java

Added: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/XmiWriterCasConsumerCtakes.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/XmiWriterCasConsumerCtakes.java?rev=1411756&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/XmiWriterCasConsumerCtakes.java
(added)
+++ incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/XmiWriterCasConsumerCtakes.java
Tue Nov 20 17:37:37 2012
@@ -0,0 +1,214 @@
+package org.apache.ctakes.core.cc;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.cas.impl.XmiCasSerializer;
+import org.apache.uima.collection.CasConsumerDescription;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.examples.SourceDocumentInformation;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+import org.apache.uima.util.InvalidXMLException;
+import org.apache.uima.util.UriUtils;
+import org.apache.uima.util.XMLInputSource;
+import org.apache.uima.util.XMLSerializer;
+import org.xml.sax.SAXException;
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.typesystem.type.structured.DocumentID;
+
+/**
+ * A simple CAS consumer that writes the CAS to XMI format.
+ * <p>
+ * This CAS Consumer takes one parameter:
+ * <ul>
+ * <li><code>OutputDirectory</code> - path to directory into which output
files will be written</li>
+ * </ul>
+ */
+public class XmiWriterCasConsumerCtakes extends CasConsumer_ImplBase {
+  /**
+   * Name of configuration parameter that must be set to the path of a directory into which
the
+   * output files will be written.
+   */
+  public static final String PARAM_OUTPUTDIR = "OutputDirectory";
+
+  private File mOutputDir;
+
+  private int mDocNum;
+
+  public void initialize() throws ResourceInitializationException {
+    mDocNum = 0;
+    mOutputDir = new File((String) getConfigParameterValue(PARAM_OUTPUTDIR));
+    if (!mOutputDir.exists()) {
+      mOutputDir.mkdirs();
+    }
+  }
+
+  /**
+   * Processes the CAS which was populated by the TextAnalysisEngines. <br>
+   * In this case, the CAS is converted to XMI and written into the output file .
+   * 
+   * @param aCAS
+   *          a CAS which has been populated by the TAEs
+   * 
+   * @throws ResourceProcessException
+   *           if there is an error in processing the Resource
+   * 
+   * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS)
+   */
+  public void processCas(CAS aCAS) throws ResourceProcessException {
+    String modelFileName = null;
+
+    JCas jcas;
+    try {
+      jcas = aCAS.getJCas();
+    } catch (CASException e) {
+      throw new ResourceProcessException(e);
+    }
+    
+    String originalFileName = DocumentIDAnnotationUtil.getDocumentID(jcas);
+    File outFile = null;
+    if (originalFileName != null && !originalFileName.isEmpty())
+    {
+      File inFile;
+      try
+      {
+    	String outFileName = null;
+    	if (originalFileName.contains("/"))
+    	{
+	        URI uri = UriUtils.quote(originalFileName);
+	        inFile = new File(uri);
+	        outFileName = inFile.getName();
+    	} else
+    	{
+    		outFileName = originalFileName;
+    	}
+        outFileName += ".xmi";
+        outFile = new File(mOutputDir, outFileName);
+        
+      } catch (URISyntaxException e)
+      {
+    	// bad URI, use default processing below
+      }
+      
+    }
+    if (outFile == null) {
+        outFile = new File(mOutputDir, "doc" + mDocNum++ + ".xmi"); // Jira UIMA-629
+     }
+    // serialize XCAS and write to output file
+    try {
+      writeXmi(jcas.getCas(), outFile, modelFileName);
+    } catch (IOException e) {
+      throw new ResourceProcessException(e);
+    } catch (SAXException e) {
+      throw new ResourceProcessException(e);
+    }
+  }
+
+  /**
+   * Serialize a CAS to a file in XMI format
+   * 
+   * @param aCas
+   *          CAS to serialize
+   * @param name
+   *          output file
+   * @throws SAXException
+   * @throws Exception
+   * 
+   * @throws ResourceProcessException
+   */
+  private void writeXmi(CAS aCas, File name) throws IOException, SAXException {
+    FileOutputStream out = null;
+
+    try {
+      // write XMI
+      out = new FileOutputStream(name);
+      XmiCasSerializer ser = new XmiCasSerializer(aCas.getTypeSystem());
+      XMLSerializer xmlSer = new XMLSerializer(out, false);
+      ser.serialize(aCas, xmlSer.getContentHandler());
+    } finally {
+      if (out != null) {
+        out.close();
+      }
+    }
+  }
+
+  /**
+   * Serialize a CAS to a file in XMI format
+   * 
+   * @param aCas
+   *          CAS to serialize
+   * @param name
+   *          output file
+   * @throws SAXException
+   * @throws Exception
+   * 
+   * @throws ResourceProcessException
+   */
+  private void writeXmi(CAS aCas, File name, String modelFileName) throws IOException, SAXException
{
+    FileOutputStream out = null;
+
+    try {
+      // write XMI
+      out = new FileOutputStream(name);
+      XmiCasSerializer ser = new XmiCasSerializer(aCas.getTypeSystem());
+      XMLSerializer xmlSer = new XMLSerializer(out, false);
+      ser.serialize(aCas, xmlSer.getContentHandler());
+    } finally {
+      if (out != null) {
+        out.close();
+      }
+    }
+  }
+
+  /**
+   * Parses and returns the descriptor for this collection reader. The descriptor is stored
in the
+   * uima.jar file and located using the ClassLoader.
+   * 
+   * @return an object containing all of the information parsed from the descriptor.
+   * 
+   * @throws InvalidXMLException
+   *           if the descriptor is invalid or missing
+   */
+  public static CasConsumerDescription getDescription() throws InvalidXMLException {
+    InputStream descStream = XmiWriterCasConsumerCtakes.class
+            .getResourceAsStream("XmiWriterCasConsumerCtakes.xml");
+    return UIMAFramework.getXMLParser().parseCasConsumerDescription(
+            new XMLInputSource(descStream, null));
+  }
+  
+  public static URL getDescriptorURL() {
+    return XmiWriterCasConsumerCtakes.class.getResource("XmiWriterCasConsumerCtakes.xml");
+  }  
+}



Mime
View raw message