incubator-ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From james-mas...@apache.org
Subject svn commit: r1403989 [13/28] - in /incubator/ctakes/branches/SHARPn-cTAKES: Constituency Parser/src/org/chboston/cnlp/ctakes/parser/ Constituency Parser/src/org/chboston/cnlp/ctakes/parser/uima/ae/ Constituency Parser/src/org/chboston/cnlp/ctakes/parse...
Date Wed, 31 Oct 2012 05:26:55 GMT
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/CasConsumer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/CasConsumer.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/CasConsumer.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/CasConsumer.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
 /*
- * Copyright: (c) 2009   Mayo Foundation for Medical Education and 
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify 
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  * http://www.apache.org/licenses/LICENSE-2.0 
  * 
  * Unless required by applicable law or agreed to in writing, software
@@ -21,105 +14,105 @@
  * See the License for the specific language governing permissions and 
  * limitations under the License. 
  */
-package edu.mayo.bmi.uima.core.cc;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.OutputStream;
-
-import org.apache.log4j.Logger;
-
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.impl.XCASSerializer;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-
-import edu.mayo.bmi.uima.core.util.DocumentIDAnnotationUtil;
-
-
-/**
- * CasConsumer that writes a JCas (the current view) to an xml file
- * 
- * @author Mayo Clinic 
- */
-public class CasConsumer extends CasConsumer_ImplBase
-{
-    // LOG4J logger based on class name
-    private Logger iv_logger = Logger.getLogger(getClass().getName());
-
-    private String iv_outputDir = null;
-    
-    // iv_procCount is used to name the output files sequentially if there 
-    // is a problem with naming based on source names
-    private int iv_procCount = 0; 
-
-    
-    /**
-     * Read in configuration parameters
-     */
-    public void initialize() throws ResourceInitializationException {
-        iv_outputDir = (String) getConfigParameterValue("outputDir");
-    }
-
-
-    /**
-     * Write a formatted xml file containing data from the view.
-     * The file name will come from the DocumentID annotation,
-     * which is associated with a view.
-     * We append .xml to the DocumentID/filename 
-     */
-    private void processView(JCas view) throws Exception {
-        // String docText = view.getDocumentText();
-
-        String docName = DocumentIDAnnotationUtil.getDocumentID(view);
-
-        File outputFile;
-        if (docName==null) {
-        	docName = "doc" + iv_procCount + ".xml";
-        }
-        else {
-        	docName = docName + ".xml";        		
-			//	if (!docName.endsWith(".xml")) {
-			//    	docName = docName + ".xml";        		
-			//	}
-        }
-        
-        OutputStream out=null;
-        try {
-        	File outputDir = new File(iv_outputDir);
-        	outputDir.mkdirs();
-            outputFile = new File(iv_outputDir + File.separatorChar + docName);
-            out = new FileOutputStream(outputFile);
-            XCASSerializer.serialize(view.getCas(), out, true); // true -> formats the output
-        } 
-        finally {
-	        iv_procCount++;
-	        if (out != null) {
-	        	out.close();
-	        }
-        }
-
-    }
-
-    
-    /**
-     * Create an xml file from the data in the cas.
-     */
-    public void processCas(CAS cas) throws ResourceProcessException {
-
-    	iv_logger.info("Started");
-    	
-        try { 
-
-        	JCas currentView = cas.getCurrentView().getJCas();
-            processView(currentView);
-            
-        } catch (Exception e) {
-        	throw new ResourceProcessException(e);
-        }
-
-    }
-
-}
\ No newline at end of file
+package edu.mayo.bmi.uima.core.cc;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStream;
+
+import org.apache.log4j.Logger;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.impl.XCASSerializer;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+
+import edu.mayo.bmi.uima.core.util.DocumentIDAnnotationUtil;
+
+
+/**
+ * CasConsumer that writes a JCas (the current view) to an xml file
+ * 
+ * @author Mayo Clinic 
+ */
+public class CasConsumer extends CasConsumer_ImplBase
+{
+    // LOG4J logger based on class name
+    private Logger iv_logger = Logger.getLogger(getClass().getName());
+
+    private String iv_outputDir = null;
+    
+    // iv_procCount is used to name the output files sequentially if there 
+    // is a problem with naming based on source names
+    private int iv_procCount = 0; 
+
+    
+    /**
+     * Read in configuration parameters
+     */
+    public void initialize() throws ResourceInitializationException {
+        iv_outputDir = (String) getConfigParameterValue("outputDir");
+    }
+
+
+    /**
+     * Write a formatted xml file containing data from the view.
+     * The file name will come from the DocumentID annotation,
+     * which is associated with a view.
+     * We append .xml to the DocumentID/filename 
+     */
+    private void processView(JCas view) throws Exception {
+        // String docText = view.getDocumentText();
+
+        String docName = DocumentIDAnnotationUtil.getDocumentID(view);
+
+        File outputFile;
+        if (docName==null) {
+        	docName = "doc" + iv_procCount + ".xml";
+        }
+        else {
+        	docName = docName + ".xml";        		
+			//	if (!docName.endsWith(".xml")) {
+			//    	docName = docName + ".xml";        		
+			//	}
+        }
+        
+        OutputStream out=null;
+        try {
+        	File outputDir = new File(iv_outputDir);
+        	outputDir.mkdirs();
+            outputFile = new File(iv_outputDir + File.separatorChar + docName);
+            out = new FileOutputStream(outputFile);
+            XCASSerializer.serialize(view.getCas(), out, true); // true -> formats the output
+        } 
+        finally {
+	        iv_procCount++;
+	        if (out != null) {
+	        	out.close();
+	        }
+        }
+
+    }
+
+    
+    /**
+     * Create an xml file from the data in the cas.
+     */
+    public void processCas(CAS cas) throws ResourceProcessException {
+
+    	iv_logger.info("Started");
+    	
+        try { 
+
+        	JCas currentView = cas.getCurrentView().getJCas();
+            processView(currentView);
+            
+        } catch (Exception e) {
+        	throw new ResourceProcessException(e);
+        }
+
+    }
+
+}

Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/FilesInDirectoryCasConsumer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/FilesInDirectoryCasConsumer.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/FilesInDirectoryCasConsumer.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/FilesInDirectoryCasConsumer.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
 /*
- * Copyright: (c) 2009   Mayo Foundation for Medical Education and 
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify 
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  * http://www.apache.org/licenses/LICENSE-2.0 
  * 
  * Unless required by applicable law or agreed to in writing, software
@@ -21,95 +14,95 @@
  * See the License for the specific language governing permissions and 
  * limitations under the License. 
  */
-package edu.mayo.bmi.uima.core.cc;
-
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-
-import org.apache.uima.cas.CAS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-
-import edu.mayo.bmi.uima.core.util.DocumentIDAnnotationUtil;
-
-/**
- * For each CAS a local file with the document text is written to a directory specifed by a parameter.  
- * This CAS consumer does not make use of any annotation information in the cas except for the document 
- * id specified the CommonTypeSystem.xml descriptor.  The document id will be the name of the file written 
- * for each CAS.  
- * 
- * This CAS consumer may be useful if you want to write the results of a collection reader and/or CAS 
- * initializer to the local file system.  For example, a JDBC Collection Reader may read XML documents 
- * from a database and a specialized cas initializer may convert the XML to plain text.  The 
- * FilesInDirectoryCasConsumer can now be used to write the plain text to local plain text files.
- */
-
-public class FilesInDirectoryCasConsumer extends CasConsumer_ImplBase {
-
-	public static final String PARAM_OUTPUTDIR = "OutputDirectory";
-
-	File iv_outputDirectory;
-	
-	public void initialize() throws ResourceInitializationException 
-	{
-	    String outputDirectoryName = (String)getConfigParameterValue(PARAM_OUTPUTDIR);
-	    iv_outputDirectory = new File(outputDirectoryName);
-	    if(!iv_outputDirectory.exists() || !iv_outputDirectory.isDirectory())
-	    	throw new ResourceInitializationException(
-	    			new Exception("Parameter setting 'OutputDirectory' does not point to an existing directory."));
-	}
-	
-	public void processCas(CAS cas) throws ResourceProcessException 
-	{
-		try 
-		{
-			JCas jcas;
-			jcas = cas.getJCas();
-			//	jcas = cas.getJCas().getView("_InitialView");
-			//	jcas = cas.getJCas().getView("plaintext");
-		
-			String documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
-			String documentText = jcas.getDocumentText();
-
-			if (documentID==null) {
-
-				jcas = cas.getJCas().getView("_InitialView");
-				documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
-
-				if (documentID==null) {
-				
-					jcas = cas.getJCas().getView("plaintext");
-					documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
-					
-					if (documentID==null) {
-						documentID = "doc_"+new java.util.Date().getTime()+".xml"; // use timestamp in name: doc_TIMESTAMP.xml 
-						System.err.println("Unable to find DocumentIDAnnotation, using " + documentID);
-					}
-				}
-				
-			}
-
-			writeToFile(documentID, documentText);
-			
-		}
-		catch(Exception e)
-		{
-			throw new ResourceProcessException(e);
-		}
-	}
-	
-	private void writeToFile(String documentID, String documentText) throws IOException
-	{
-		File outputFile = new File(iv_outputDirectory, documentID);
-		outputFile.createNewFile();
-		OutputStream out = new BufferedOutputStream(new FileOutputStream(outputFile));
-		out.write(documentText.getBytes());
-		out.flush();
-		out.close();
-	}
-}
+package edu.mayo.bmi.uima.core.cc;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+
+import edu.mayo.bmi.uima.core.util.DocumentIDAnnotationUtil;
+
+/**
+ * For each CAS a local file with the document text is written to a directory specifed by a parameter.  
+ * This CAS consumer does not make use of any annotation information in the cas except for the document 
+ * id specified the CommonTypeSystem.xml descriptor.  The document id will be the name of the file written 
+ * for each CAS.  
+ * 
+ * This CAS consumer may be useful if you want to write the results of a collection reader and/or CAS 
+ * initializer to the local file system.  For example, a JDBC Collection Reader may read XML documents 
+ * from a database and a specialized cas initializer may convert the XML to plain text.  The 
+ * FilesInDirectoryCasConsumer can now be used to write the plain text to local plain text files.
+ */
+
+public class FilesInDirectoryCasConsumer extends CasConsumer_ImplBase {
+
+	public static final String PARAM_OUTPUTDIR = "OutputDirectory";
+
+	File iv_outputDirectory;
+	
+	public void initialize() throws ResourceInitializationException 
+	{
+	    String outputDirectoryName = (String)getConfigParameterValue(PARAM_OUTPUTDIR);
+	    iv_outputDirectory = new File(outputDirectoryName);
+	    if(!iv_outputDirectory.exists() || !iv_outputDirectory.isDirectory())
+	    	throw new ResourceInitializationException(
+	    			new Exception("Parameter setting 'OutputDirectory' does not point to an existing directory."));
+	}
+	
+	public void processCas(CAS cas) throws ResourceProcessException 
+	{
+		try 
+		{
+			JCas jcas;
+			jcas = cas.getJCas();
+			//	jcas = cas.getJCas().getView("_InitialView");
+			//	jcas = cas.getJCas().getView("plaintext");
+		
+			String documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
+			String documentText = jcas.getDocumentText();
+
+			if (documentID==null) {
+
+				jcas = cas.getJCas().getView("_InitialView");
+				documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
+
+				if (documentID==null) {
+				
+					jcas = cas.getJCas().getView("plaintext");
+					documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
+					
+					if (documentID==null) {
+						documentID = "doc_"+new java.util.Date().getTime()+".xml"; // use timestamp in name: doc_TIMESTAMP.xml 
+						System.err.println("Unable to find DocumentIDAnnotation, using " + documentID);
+					}
+				}
+				
+			}
+
+			writeToFile(documentID, documentText);
+			
+		}
+		catch(Exception e)
+		{
+			throw new ResourceProcessException(e);
+		}
+	}
+	
+	private void writeToFile(String documentID, String documentText) throws IOException
+	{
+		File outputFile = new File(iv_outputDirectory, documentID);
+		outputFile.createNewFile();
+		OutputStream out = new BufferedOutputStream(new FileOutputStream(outputFile));
+		out.write(documentText.getBytes());
+		out.flush();
+		out.close();
+	}
+}

Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/HtmlTableCasConsumer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/HtmlTableCasConsumer.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/HtmlTableCasConsumer.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/HtmlTableCasConsumer.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
 /*
- * Copyright: (c) 2009   Mayo Foundation for Medical Education and 
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify 
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  * http://www.apache.org/licenses/LICENSE-2.0 
  * 
  * Unless required by applicable law or agreed to in writing, software
@@ -21,397 +14,397 @@
  * See the License for the specific language governing permissions and 
  * limitations under the License. 
  */
-package edu.mayo.bmi.uima.core.cc;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
-import java.util.ArrayList;
-import java.util.BitSet;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.StringTokenizer;
-
-import org.apache.uima.cas.CAS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-
-import edu.mayo.bmi.uima.core.util.JCasUtil;
-
-/**
- * Outputs an HTML table that visualizes the specified Annotation objects over
- * the document text.
- * 
- * @author Mayo Clinic
- * 
- */
-public class HtmlTableCasConsumer extends CasConsumer_ImplBase
-{
-    private File iv_outputDir;
-
-    private int iv_tableSpanType;
-
-    private int[] iv_nestedAnnTypeArr;
-
-    // key = annotation type (java.lang.Integer)
-    // val = getter method (java.lang.reflect.Method)
-    private Map iv_getterMethMap = new HashMap();
-
-    private int iv_count;
-
-    private String[] iv_tdStyleArr = { "I", "B" };
-
-    public void initialize() throws ResourceInitializationException
-    {
-        try
-        {
-            iv_outputDir = new File(
-                    (String) getConfigParameterValue("OutputDir"));
-
-            String classname;
-            classname = (String) getConfigParameterValue("TableSpanAnnotation");
-            iv_tableSpanType = JCasUtil.getType(classname);
-
-            String[] strArr = (String[]) getConfigParameterValue("NestedAnnotations");
-            iv_nestedAnnTypeArr = new int[strArr.length];
-            for (int i = 0; i < strArr.length; i++)
-            {
-                StringTokenizer st = new StringTokenizer(strArr[i], "|");
-                classname = st.nextToken().trim();
-                iv_nestedAnnTypeArr[i] = JCasUtil.getType(classname);
-
-                // if there's an extra token, it must be a getter methodname
-                if (st.countTokens() == 1)
-                {
-                    String methName = st.nextToken().trim();
-                    Class c = Class.forName(classname);
-                    Method meth = c.getMethod(methName, (Class[]) null);
-                    iv_getterMethMap.put(new Integer(iv_nestedAnnTypeArr[i]),
-                            meth);
-                }
-            }
-
-        } catch (Exception e)
-        {
-            throw new ResourceInitializationException(e);
-        }
-    }
-
-    public void processCas(CAS cas) throws ResourceProcessException
-    {
-        try
-        {
-            JCas jcas = cas.getJCas();
-            StringBuffer htmlSB = new StringBuffer();
-            htmlSB.append("<HTML>");
-            htmlSB.append("<TITLE>?</TITLE>");
-            htmlSB.append("<BODY>");
-
-            Iterator tSpanItr = jcas.getJFSIndexRepository()
-                    .getAnnotationIndex(iv_tableSpanType).iterator();
-            while (tSpanItr.hasNext())
-            {
-                Annotation tSpanAnn = (Annotation) tSpanItr.next();
-                String tSpanText = tSpanAnn.getCoveredText();
-
-                htmlSB.append("<TABLE border=1>");
-                htmlSB.append("<TR bordercolor=\"white\">");
-                for (int i = 0; i < tSpanText.length(); i++)
-                {
-                    htmlSB.append("<TD width=10>");
-                    htmlSB.append(tSpanText.charAt(i));
-                    htmlSB.append("</TD>");
-                }
-                htmlSB.append("</TR>");
-
-                int tdStyleIdx = 0;
-                for (int nestIdx = 0; nestIdx < iv_nestedAnnTypeArr.length; nestIdx++)
-                {
-                    List nestedAnnList = getAnnotations(jcas,
-                            iv_nestedAnnTypeArr[nestIdx], tSpanAnn.getBegin(),
-                            tSpanAnn.getEnd());
-
-                    // sort nested annotation list
-                    Collections.sort(nestedAnnList,
-                            new AnnotationLengthComparator());
-
-                    List annotsAtRowList = arrangeIntoRows(tSpanAnn,
-                            nestedAnnList);
-
-                    Iterator trAnnItr = annotsAtRowList.iterator();
-                    while (trAnnItr.hasNext())
-                    {
-                        htmlSB.append("<TR>");
-                        int cursor = tSpanAnn.getBegin();
-                        List annList = (List) trAnnItr.next();
-
-                        // sort annotations in this row by offset position
-                        Collections.sort(annList,
-                                new AnnotationPositionComparator());
-
-                        Iterator annItr = annList.iterator();
-                        while (annItr.hasNext())
-                        {
-                            Annotation ann = (Annotation) annItr.next();
-                            // account for preceeding whitespace
-                            int delta = ann.getBegin() - cursor;
-                            if (delta > 0)
-                            {
-                                htmlSB.append("<TD width=10 colspan=" + delta
-                                        + ">");
-                                String whitespaceStr = "";
-                                for (int i = 0; i < delta; i++)
-                                {
-                                    whitespaceStr += ' ';
-                                }
-                                htmlSB.append(whitespaceStr);
-                                htmlSB.append("</TD>");
-                            }
-                            cursor = ann.getEnd();
-
-                            htmlSB
-                                    .append("<TD width=10 align=\"center\" colspan="
-                                            + ann.getCoveredText().length()
-                                            + ">");
-                            htmlSB.append("<");
-                            htmlSB.append(iv_tdStyleArr[tdStyleIdx]);
-                            htmlSB.append(">");
-                            htmlSB.append(getDisplayValue(
-                                    iv_nestedAnnTypeArr[nestIdx], ann));
-                            htmlSB.append("</");
-                            htmlSB.append(iv_tdStyleArr[tdStyleIdx]);
-                            htmlSB.append(">");
-                            htmlSB.append("</TD>");
-                        }
-                        htmlSB.append("</TR>");
-                    }
-
-                    tdStyleIdx++;
-                    if (tdStyleIdx == iv_tdStyleArr.length)
-                    {
-                        tdStyleIdx = 0;
-                    }
-                }
-                htmlSB.append("</BR>");
-                htmlSB.append("</BR>");
-
-                htmlSB.append("</TABLE>");
-            }
-
-            htmlSB.append("</BODY>");
-            htmlSB.append("</HTML>");
-
-            File f = new File(iv_outputDir.getAbsolutePath() + File.separator
-                    + "doc" + iv_count + ".html");
-            f.createNewFile();
-            BufferedWriter bw = new BufferedWriter(new FileWriter(f));
-            bw.write(htmlSB.toString());
-            bw.close();
-
-        } catch (Exception e)
-        {
-            throw new ResourceProcessException(e);
-        }
-        iv_count++;
-    }
-
-    /**
-     * Gets a value to be displayed in table cell for the given annotation
-     * object.
-     * 
-     * @param annType
-     * @param ann
-     * @return
-     */
-    private String getDisplayValue(int annType, Annotation ann)
-            throws IllegalAccessException, InvocationTargetException
-    {
-        Integer key = new Integer(annType);
-        if (iv_getterMethMap.containsKey(key))
-        {
-            Method meth = (Method) iv_getterMethMap.get(key);
-            Object val = meth.invoke(ann, (Object[]) null);
-            if (val != null)
-            {
-                return String.valueOf(val);
-            } else
-            {
-                // otherwise return empty string
-                return "";
-            }
-        } else
-        {
-            String typeName = ann.getType().getShortName();
-            return typeName.substring(0, typeName.indexOf("Annotation"));
-        }
-    }
-
-    /**
-     * Arranges the list of annotations into one or more rows. Each element of
-     * the return List represents a row. Each row is represented as a row of
-     * Annotation objects that below to that row.
-     * 
-     * @param tSpanAnn
-     * @param nestedAnnList
-     * @return
-     */
-    private List arrangeIntoRows(Annotation tSpanAnn, List nestedAnnList)
-    {
-        int tSpanSize = tSpanAnn.getCoveredText().length();
-        List maskAtRowList = new ArrayList();
-        maskAtRowList.add(new BitSet(tSpanSize));
-
-        List annotsAtRowList = new ArrayList();
-
-        // divide parse annotations into rows
-        while (nestedAnnList.size() != 0)
-        {
-            // pop annotation off
-            Annotation ann = (Annotation) nestedAnnList.remove(0);
-
-            BitSet annBitSet = new BitSet(tSpanSize);
-            annBitSet.set(ann.getBegin() - tSpanAnn.getBegin(), ann.getEnd()
-                    - tSpanAnn.getBegin());
-
-            // figure out which TR to place it in
-            int idx = 0;
-            boolean rowFound = false;
-            while (!rowFound)
-            {
-                BitSet trBitSet = (BitSet) maskAtRowList.get(idx);
-
-                // interset BitSets to determine if annotation will fit
-                // in this row
-                while (trBitSet.intersects(annBitSet))
-                {
-                    idx++;
-                    if ((idx + 1) > maskAtRowList.size())
-                    {
-                        trBitSet = new BitSet(tSpanSize);
-                        maskAtRowList.add(trBitSet);
-                    } else
-                    {
-                        trBitSet = (BitSet) maskAtRowList.get(idx);
-                    }
-                }
-                trBitSet.or(annBitSet);
-                rowFound = true;
-            }
-
-            List annList = null;
-            if ((idx + 1) > annotsAtRowList.size())
-            {
-                annList = new ArrayList();
-                annList.add(ann);
-                annotsAtRowList.add(annList);
-            } else
-            {
-                annList = (List) annotsAtRowList.get(idx);
-                annList.add(ann);
-            }
-        }
-        return annotsAtRowList;
-    }
-
-    /**
-     * Comparator for comparing two Annotation objects based on span length.
-     * 
-     * @author Mayo Clinic
-     * 
-     */
-    class AnnotationLengthComparator implements Comparator
-    {
-        /*
-         * (non-Javadoc)
-         * 
-         * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
-         */
-        public int compare(Object o1, Object o2)
-        {
-            Annotation a1 = (Annotation) o1;
-            Annotation a2 = (Annotation) o2;
-
-            Integer len1 = new Integer(a1.getCoveredText().length());
-            Integer len2 = new Integer(a2.getCoveredText().length());
-
-            if (len1.equals(len2))
-            {
-                if (a1.getBegin() < a2.getBegin())
-                    return -1;
-                else if (a1.getBegin() > a2.getBegin())
-                    return 1;
-                else
-                {
-                    if (a1.getEnd() < a2.getEnd())
-                        return 1;
-                    else if (a1.getEnd() > a2.getEnd())
-                        return -1;
-                    else
-                        return 0;
-                }
-            } else
-            {
-                return len1.compareTo(len2);
-            }
-        }
-    }
-
-    /**
-     * Comparator for comparing two Annotation objects based on offset position.
-     * 
-     * @author Mayo Clinic
-     * 
-     */
-    class AnnotationPositionComparator implements Comparator
-    {
-        /*
-         * (non-Javadoc)
-         * 
-         * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
-         */
-        public int compare(Object o1, Object o2)
-        {
-            Annotation a1 = (Annotation) o1;
-            Annotation a2 = (Annotation) o2;
-
-            if (a1.getBegin() < a2.getBegin())
-                return -1;
-            else if (a1.getBegin() > a2.getBegin())
-                return 1;
-            else
-            {
-                if (a1.getEnd() < a2.getEnd())
-                    return 1;
-                else if (a1.getEnd() > a2.getEnd())
-                    return -1;
-                else
-                    return 0;
-            }
-        }
-    }
-
-    private List getAnnotations(JCas jcas, int annType, int begin, int end)
-    {
-        List list = new ArrayList();
-        Iterator itr = jcas.getJFSIndexRepository().getAnnotationIndex(annType)
-                .iterator();
-        while (itr.hasNext())
-        {
-            Annotation ann = (Annotation) itr.next();
-            if ((ann.getBegin() >= begin) && (ann.getEnd() <= end))
-            {
-                list.add(ann);
-            }
-        }
-        return list;
-    }
-}
+package edu.mayo.bmi.uima.core.cc;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.StringTokenizer;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+
+import edu.mayo.bmi.uima.core.util.JCasUtil;
+
+/**
+ * Outputs an HTML table that visualizes the specified Annotation objects over
+ * the document text.
+ * 
+ * @author Mayo Clinic
+ * 
+ */
+public class HtmlTableCasConsumer extends CasConsumer_ImplBase
+{
+    private File iv_outputDir;
+
+    private int iv_tableSpanType;
+
+    private int[] iv_nestedAnnTypeArr;
+
+    // key = annotation type (java.lang.Integer)
+    // val = getter method (java.lang.reflect.Method)
+    private Map iv_getterMethMap = new HashMap();
+
+    private int iv_count;
+
+    private String[] iv_tdStyleArr = { "I", "B" };
+
+    public void initialize() throws ResourceInitializationException
+    {
+        try
+        {
+            iv_outputDir = new File(
+                    (String) getConfigParameterValue("OutputDir"));
+
+            String classname;
+            classname = (String) getConfigParameterValue("TableSpanAnnotation");
+            iv_tableSpanType = JCasUtil.getType(classname);
+
+            String[] strArr = (String[]) getConfigParameterValue("NestedAnnotations");
+            iv_nestedAnnTypeArr = new int[strArr.length];
+            for (int i = 0; i < strArr.length; i++)
+            {
+                StringTokenizer st = new StringTokenizer(strArr[i], "|");
+                classname = st.nextToken().trim();
+                iv_nestedAnnTypeArr[i] = JCasUtil.getType(classname);
+
+                // if there's an extra token, it must be a getter methodname
+                if (st.countTokens() == 1)
+                {
+                    String methName = st.nextToken().trim();
+                    Class c = Class.forName(classname);
+                    Method meth = c.getMethod(methName, (Class[]) null);
+                    iv_getterMethMap.put(new Integer(iv_nestedAnnTypeArr[i]),
+                            meth);
+                }
+            }
+
+        } catch (Exception e)
+        {
+            throw new ResourceInitializationException(e);
+        }
+    }
+
+    public void processCas(CAS cas) throws ResourceProcessException
+    {
+        try
+        {
+            JCas jcas = cas.getJCas();
+            StringBuffer htmlSB = new StringBuffer();
+            htmlSB.append("<HTML>");
+            htmlSB.append("<TITLE>?</TITLE>");
+            htmlSB.append("<BODY>");
+
+            Iterator tSpanItr = jcas.getJFSIndexRepository()
+                    .getAnnotationIndex(iv_tableSpanType).iterator();
+            while (tSpanItr.hasNext())
+            {
+                Annotation tSpanAnn = (Annotation) tSpanItr.next();
+                String tSpanText = tSpanAnn.getCoveredText();
+
+                htmlSB.append("<TABLE border=1>");
+                htmlSB.append("<TR bordercolor=\"white\">");
+                for (int i = 0; i < tSpanText.length(); i++)
+                {
+                    htmlSB.append("<TD width=10>");
+                    htmlSB.append(tSpanText.charAt(i));
+                    htmlSB.append("</TD>");
+                }
+                htmlSB.append("</TR>");
+
+                int tdStyleIdx = 0;
+                for (int nestIdx = 0; nestIdx < iv_nestedAnnTypeArr.length; nestIdx++)
+                {
+                    List nestedAnnList = getAnnotations(jcas,
+                            iv_nestedAnnTypeArr[nestIdx], tSpanAnn.getBegin(),
+                            tSpanAnn.getEnd());
+
+                    // sort nested annotation list
+                    Collections.sort(nestedAnnList,
+                            new AnnotationLengthComparator());
+
+                    List annotsAtRowList = arrangeIntoRows(tSpanAnn,
+                            nestedAnnList);
+
+                    Iterator trAnnItr = annotsAtRowList.iterator();
+                    while (trAnnItr.hasNext())
+                    {
+                        htmlSB.append("<TR>");
+                        int cursor = tSpanAnn.getBegin();
+                        List annList = (List) trAnnItr.next();
+
+                        // sort annotations in this row by offset position
+                        Collections.sort(annList,
+                                new AnnotationPositionComparator());
+
+                        Iterator annItr = annList.iterator();
+                        while (annItr.hasNext())
+                        {
+                            Annotation ann = (Annotation) annItr.next();
+                            // account for preceeding whitespace
+                            int delta = ann.getBegin() - cursor;
+                            if (delta > 0)
+                            {
+                                htmlSB.append("<TD width=10 colspan=" + delta
+                                        + ">");
+                                String whitespaceStr = "";
+                                for (int i = 0; i < delta; i++)
+                                {
+                                    whitespaceStr += ' ';
+                                }
+                                htmlSB.append(whitespaceStr);
+                                htmlSB.append("</TD>");
+                            }
+                            cursor = ann.getEnd();
+
+                            htmlSB
+                                    .append("<TD width=10 align=\"center\" colspan="
+                                            + ann.getCoveredText().length()
+                                            + ">");
+                            htmlSB.append("<");
+                            htmlSB.append(iv_tdStyleArr[tdStyleIdx]);
+                            htmlSB.append(">");
+                            htmlSB.append(getDisplayValue(
+                                    iv_nestedAnnTypeArr[nestIdx], ann));
+                            htmlSB.append("</");
+                            htmlSB.append(iv_tdStyleArr[tdStyleIdx]);
+                            htmlSB.append(">");
+                            htmlSB.append("</TD>");
+                        }
+                        htmlSB.append("</TR>");
+                    }
+
+                    tdStyleIdx++;
+                    if (tdStyleIdx == iv_tdStyleArr.length)
+                    {
+                        tdStyleIdx = 0;
+                    }
+                }
+                htmlSB.append("</BR>");
+                htmlSB.append("</BR>");
+
+                htmlSB.append("</TABLE>");
+            }
+
+            htmlSB.append("</BODY>");
+            htmlSB.append("</HTML>");
+
+            File f = new File(iv_outputDir.getAbsolutePath() + File.separator
+                    + "doc" + iv_count + ".html");
+            f.createNewFile();
+            BufferedWriter bw = new BufferedWriter(new FileWriter(f));
+            bw.write(htmlSB.toString());
+            bw.close();
+
+        } catch (Exception e)
+        {
+            throw new ResourceProcessException(e);
+        }
+        iv_count++;
+    }
+
+    /**
+     * Gets a value to be displayed in table cell for the given annotation
+     * object.
+     * 
+     * @param annType
+     * @param ann
+     * @return
+     */
+    private String getDisplayValue(int annType, Annotation ann)
+            throws IllegalAccessException, InvocationTargetException
+    {
+        Integer key = new Integer(annType);
+        if (iv_getterMethMap.containsKey(key))
+        {
+            Method meth = (Method) iv_getterMethMap.get(key);
+            Object val = meth.invoke(ann, (Object[]) null);
+            if (val != null)
+            {
+                return String.valueOf(val);
+            } else
+            {
+                // otherwise return empty string
+                return "";
+            }
+        } else
+        {
+            String typeName = ann.getType().getShortName();
+            return typeName.substring(0, typeName.indexOf("Annotation"));
+        }
+    }
+
+    /**
+     * Arranges the list of annotations into one or more rows. Each element of
+     * the return List represents a row. Each row is represented as a row of
+     * Annotation objects that below to that row.
+     * 
+     * @param tSpanAnn
+     * @param nestedAnnList
+     * @return
+     */
+    private List arrangeIntoRows(Annotation tSpanAnn, List nestedAnnList)
+    {
+        int tSpanSize = tSpanAnn.getCoveredText().length();
+        List maskAtRowList = new ArrayList();
+        maskAtRowList.add(new BitSet(tSpanSize));
+
+        List annotsAtRowList = new ArrayList();
+
+        // divide parse annotations into rows
+        while (nestedAnnList.size() != 0)
+        {
+            // pop annotation off
+            Annotation ann = (Annotation) nestedAnnList.remove(0);
+
+            BitSet annBitSet = new BitSet(tSpanSize);
+            annBitSet.set(ann.getBegin() - tSpanAnn.getBegin(), ann.getEnd()
+                    - tSpanAnn.getBegin());
+
+            // figure out which TR to place it in
+            int idx = 0;
+            boolean rowFound = false;
+            while (!rowFound)
+            {
+                BitSet trBitSet = (BitSet) maskAtRowList.get(idx);
+
+                // interset BitSets to determine if annotation will fit
+                // in this row
+                while (trBitSet.intersects(annBitSet))
+                {
+                    idx++;
+                    if ((idx + 1) > maskAtRowList.size())
+                    {
+                        trBitSet = new BitSet(tSpanSize);
+                        maskAtRowList.add(trBitSet);
+                    } else
+                    {
+                        trBitSet = (BitSet) maskAtRowList.get(idx);
+                    }
+                }
+                trBitSet.or(annBitSet);
+                rowFound = true;
+            }
+
+            List annList = null;
+            if ((idx + 1) > annotsAtRowList.size())
+            {
+                annList = new ArrayList();
+                annList.add(ann);
+                annotsAtRowList.add(annList);
+            } else
+            {
+                annList = (List) annotsAtRowList.get(idx);
+                annList.add(ann);
+            }
+        }
+        return annotsAtRowList;
+    }
+
+    /**
+     * Comparator for comparing two Annotation objects based on span length.
+     * 
+     * @author Mayo Clinic
+     * 
+     */
+    class AnnotationLengthComparator implements Comparator
+    {
+        /*
+         * (non-Javadoc)
+         * 
+         * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
+         */
+        public int compare(Object o1, Object o2)
+        {
+            Annotation a1 = (Annotation) o1;
+            Annotation a2 = (Annotation) o2;
+
+            Integer len1 = new Integer(a1.getCoveredText().length());
+            Integer len2 = new Integer(a2.getCoveredText().length());
+
+            if (len1.equals(len2))
+            {
+                if (a1.getBegin() < a2.getBegin())
+                    return -1;
+                else if (a1.getBegin() > a2.getBegin())
+                    return 1;
+                else
+                {
+                    if (a1.getEnd() < a2.getEnd())
+                        return 1;
+                    else if (a1.getEnd() > a2.getEnd())
+                        return -1;
+                    else
+                        return 0;
+                }
+            } else
+            {
+                return len1.compareTo(len2);
+            }
+        }
+    }
+
+    /**
+     * Comparator for comparing two Annotation objects based on offset position.
+     * 
+     * @author Mayo Clinic
+     * 
+     */
+    class AnnotationPositionComparator implements Comparator
+    {
+        /*
+         * (non-Javadoc)
+         * 
+         * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
+         */
+        public int compare(Object o1, Object o2)
+        {
+            Annotation a1 = (Annotation) o1;
+            Annotation a2 = (Annotation) o2;
+
+            if (a1.getBegin() < a2.getBegin())
+                return -1;
+            else if (a1.getBegin() > a2.getBegin())
+                return 1;
+            else
+            {
+                if (a1.getEnd() < a2.getEnd())
+                    return 1;
+                else if (a1.getEnd() > a2.getEnd())
+                    return -1;
+                else
+                    return 0;
+            }
+        }
+    }
+
+    private List getAnnotations(JCas jcas, int annType, int begin, int end)
+    {
+        List list = new ArrayList();
+        Iterator itr = jcas.getJFSIndexRepository().getAnnotationIndex(annType)
+                .iterator();
+        while (itr.hasNext())
+        {
+            Annotation ann = (Annotation) itr.next();
+            if ((ann.getBegin() >= begin) && (ann.getEnd() <= end))
+            {
+                list.add(ann);
+            }
+        }
+        return list;
+    }
+}

Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/NonTerminalConsumer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/NonTerminalConsumer.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/NonTerminalConsumer.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/NonTerminalConsumer.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
 /*
- * Copyright: (c) 2009   Mayo Foundation for Medical Education and 
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify 
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  * http://www.apache.org/licenses/LICENSE-2.0 
  * 
  * Unless required by applicable law or agreed to in writing, software
@@ -21,24 +14,24 @@
  * See the License for the specific language governing permissions and 
  * limitations under the License. 
  */
-package edu.mayo.bmi.uima.core.cc;
-
-import org.apache.uima.collection.CasConsumer;
-
-/**
- * Extends the CasConsumer interface to provide a method for getting
- * output in XML form.  A regular CasConsumer is "terminal" in the sense
- * that its output does not get returned to object that initiated the
- * CasConsumer.  This interface allows for "non-terminal" behavior so that
- * the output can be returned.
- *  
- * @author Mayo Clinic
- */
-public interface NonTerminalConsumer extends CasConsumer
-{
-    /**
-     * Gets the generated output from a CasConsumer in XML form.
-     * @return Output xml in String form.
-     */
-    public String getOutputXml();
-}
\ No newline at end of file
+package edu.mayo.bmi.uima.core.cc;
+
+import org.apache.uima.collection.CasConsumer;
+
+/**
+ * Extends the CasConsumer interface to provide a method for getting
+ * output in XML form.  A regular CasConsumer is "terminal" in the sense
+ * that its output does not get returned to object that initiated the
+ * CasConsumer.  This interface allows for "non-terminal" behavior so that
+ * the output can be returned.
+ *  
+ * @author Mayo Clinic
+ */
+public interface NonTerminalConsumer extends CasConsumer
+{
+    /**
+     * Gets the generated output from a CasConsumer in XML form.
+     * @return Output xml in String form.
+     */
+    public String getOutputXml();
+}

Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/NormalizedFilesInDirectoryCasConsumer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/NormalizedFilesInDirectoryCasConsumer.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/NormalizedFilesInDirectoryCasConsumer.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/NormalizedFilesInDirectoryCasConsumer.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
 /*
- * Copyright: (c) 2009   Mayo Foundation for Medical Education and 
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify 
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  * http://www.apache.org/licenses/LICENSE-2.0 
  * 
  * Unless required by applicable law or agreed to in writing, software
@@ -21,85 +14,85 @@
  * See the License for the specific language governing permissions and 
  * limitations under the License. 
  */
-package edu.mayo.bmi.uima.core.cc;
-
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.Iterator;
-
-import org.apache.uima.cas.CAS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.jcas.JFSIndexRepository;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-
-import edu.mayo.bmi.uima.core.type.syntax.WordToken;
-import edu.mayo.bmi.uima.core.util.DocumentIDAnnotationUtil;
-
-/**
- * For each CAS a local file with the document text is written to a directory specifed by a parameter.  
- * This CAS consumer does not make use of any annotation information in the cas except for the document 
- * id specified the CommonTypeSystem.xml descriptor.  The document id will be the name of the file written 
- * for each CAS.  
- * 
- * This CAS consumer may be useful if you want to write the results of a collection reader and/or CAS 
- * initializer to the local file system.  For example, a JDBC Collection Reader may read XML documents 
- * from a database and a specialized cas initializer may convert the XML to plain text.  The 
- * FilesInDirectoryCasConsumer can now be used to write the plain text to local plain text files.
- */
-
-public class NormalizedFilesInDirectoryCasConsumer extends CasConsumer_ImplBase {
-
-	public static final String PARAM_OUTPUTDIR = "OutputDirectory";
-
-	File iv_outputDirectory;
-	
-	public void initialize() throws ResourceInitializationException 
-	{
-	    String outputDirectoryName = (String)getConfigParameterValue(PARAM_OUTPUTDIR);
-	    iv_outputDirectory = new File(outputDirectoryName);
-	    if(!iv_outputDirectory.exists() || !iv_outputDirectory.isDirectory())
-	    	throw new ResourceInitializationException(
-	    			new Exception("Parameter setting 'OutputDirectory' does not point to an existing directory."));
-	}
-	
-	public void processCas(CAS cas) throws ResourceProcessException 
-	{
-		try 
-		{
-			JCas jcas;
-			jcas = cas.getJCas();
-		
-			StringBuffer normalizedText = new StringBuffer();
-			
-			JFSIndexRepository indexes = jcas.getJFSIndexRepository();
-	        Iterator tokenItr = indexes.getAnnotationIndex(WordToken.type).iterator();
-	        while (tokenItr.hasNext())
-	        {
-	        	WordToken token = (WordToken) tokenItr.next();
-	        	String tokenNormText = token.getCanonicalForm();
-	        	normalizedText.append(tokenNormText+" ");	        
-	        }	        	
-			String documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
-			writeToFile(documentID, normalizedText.toString());
-		}
-		catch(Exception e)
-		{
-			throw new ResourceProcessException(e);
-		}
-	}
-	
-	private void writeToFile(String documentID, String documentText) throws IOException
-	{
-		File outputFile = new File(iv_outputDirectory, documentID);
-		outputFile.createNewFile();
-		OutputStream out = new BufferedOutputStream(new FileOutputStream(outputFile));
-		out.write(documentText.getBytes());
-		out.flush();
-		out.close();
-	}
-}
+package edu.mayo.bmi.uima.core.cc;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Iterator;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.jcas.JFSIndexRepository;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+
+import edu.mayo.bmi.uima.core.type.syntax.WordToken;
+import edu.mayo.bmi.uima.core.util.DocumentIDAnnotationUtil;
+
+/**
+ * For each CAS a local file with the document text is written to a directory specifed by a parameter.  
+ * This CAS consumer does not make use of any annotation information in the cas except for the document 
+ * id specified the CommonTypeSystem.xml descriptor.  The document id will be the name of the file written 
+ * for each CAS.  
+ * 
+ * This CAS consumer may be useful if you want to write the results of a collection reader and/or CAS 
+ * initializer to the local file system.  For example, a JDBC Collection Reader may read XML documents 
+ * from a database and a specialized cas initializer may convert the XML to plain text.  The 
+ * FilesInDirectoryCasConsumer can now be used to write the plain text to local plain text files.
+ */
+
+public class NormalizedFilesInDirectoryCasConsumer extends CasConsumer_ImplBase {
+
+	public static final String PARAM_OUTPUTDIR = "OutputDirectory";
+
+	File iv_outputDirectory;
+	
+	public void initialize() throws ResourceInitializationException 
+	{
+	    String outputDirectoryName = (String)getConfigParameterValue(PARAM_OUTPUTDIR);
+	    iv_outputDirectory = new File(outputDirectoryName);
+	    if(!iv_outputDirectory.exists() || !iv_outputDirectory.isDirectory())
+	    	throw new ResourceInitializationException(
+	    			new Exception("Parameter setting 'OutputDirectory' does not point to an existing directory."));
+	}
+	
+	public void processCas(CAS cas) throws ResourceProcessException 
+	{
+		try 
+		{
+			JCas jcas;
+			jcas = cas.getJCas();
+		
+			StringBuffer normalizedText = new StringBuffer();
+			
+			JFSIndexRepository indexes = jcas.getJFSIndexRepository();
+	        Iterator tokenItr = indexes.getAnnotationIndex(WordToken.type).iterator();
+	        while (tokenItr.hasNext())
+	        {
+	        	WordToken token = (WordToken) tokenItr.next();
+	        	String tokenNormText = token.getCanonicalForm();
+	        	normalizedText.append(tokenNormText+" ");	        
+	        }	        	
+			String documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
+			writeToFile(documentID, normalizedText.toString());
+		}
+		catch(Exception e)
+		{
+			throw new ResourceProcessException(e);
+		}
+	}
+	
+	private void writeToFile(String documentID, String documentText) throws IOException
+	{
+		File outputFile = new File(iv_outputDirectory, documentID);
+		outputFile.createNewFile();
+		OutputStream out = new BufferedOutputStream(new FileOutputStream(outputFile));
+		out.write(documentText.getBytes());
+		out.flush();
+		out.close();
+	}
+}

Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/TokenFreqCasConsumer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/TokenFreqCasConsumer.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/TokenFreqCasConsumer.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/TokenFreqCasConsumer.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
 /*
- * Copyright: (c) 2009   Mayo Foundation for Medical Education and 
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify 
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  * http://www.apache.org/licenses/LICENSE-2.0 
  * 
  * Unless required by applicable law or agreed to in writing, software
@@ -21,146 +14,146 @@
  * See the License for the specific language governing permissions and 
  * limitations under the License. 
  */
-package edu.mayo.bmi.uima.core.cc;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.TreeSet;
-
-
-import org.apache.uima.cas.CAS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.jcas.JFSIndexRepository;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-import org.apache.uima.util.ProcessTrace;
-
-import edu.mayo.bmi.uima.core.type.syntax.WordToken;
-
-/**
- * This class creates a file that contains the frequencies of the word tokens found in a set
- * in a text collection.  This cas consumer could potentially be used to create a frequency
- * file for any kind of annotation but only counts Token annotations at the moment.   
- * 
- *  @see edu.mayo.bmi.uima.core.type.syntax.WordToken.java
- */
-
-public class TokenFreqCasConsumer extends CasConsumer_ImplBase
-{ 
-	/**
-	 * The name of the parameter that is specifies the path of the output file in the
-	 * descriptor file.  The value is "TokenFreqFile" and should be set in the descriptor
-	 * file.
-	 */
-
-	public static final String PARAM_WORD_FREQ_FILE = "TokenFreqFile";
-	File wordFreqFile;
-	Map wordFreqs;
-
-/**
- * This method opens/creates the file specified by "TokenFreqFile" and initializes the 
- * data structure that will keep track of frequency counts.
- * @see org.apache.uima.collection.CasConsumer_ImplBase#initialize()
- */	
-	public void initialize() throws ResourceInitializationException 
-	{
-		try
-		{
-			String wordFreqFileName = (String) getConfigParameterValue(PARAM_WORD_FREQ_FILE);
-			wordFreqFile = new File(wordFreqFileName);
-			if(!wordFreqFile.exists())
-			{
-				wordFreqFile.createNewFile();
-			}
-		}
-		catch(Exception ioe)
-		{
-			throw new ResourceInitializationException(ioe);
-		}
-		wordFreqs = new HashMap();
-	}
-
-	/**
-	 * Iterates through all of the WordTokenAnnotation's, gets the covered text for each annotation
-	 * and increments the frequency count for that text.  
-	 * 
-	 * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS)
-	 */
-	public void processCas(CAS cas) throws ResourceProcessException 
-	{
-		try 
-		{
-			JCas jcas;
-			jcas = cas.getJCas();
-			JFSIndexRepository indexes = jcas.getJFSIndexRepository();
-	        Iterator tokenItr = indexes.getAnnotationIndex(WordToken.type).iterator();
-	        while (tokenItr.hasNext())
-	        {
-	        	WordToken token = (WordToken) tokenItr.next();
-	        	String text = token.getCoveredText();
-	        	if(!wordFreqs.containsKey(text))
-	        	{
-	        		wordFreqs.put(text, new int[1]);
-	        	}
-	        	((int[])wordFreqs.get(text))[0]++;
-	        }	        	
-		}
-		catch(Exception exception)
-		{
-			throw new ResourceProcessException(exception);
-		}
-	}
-
-	/**
-	 * This method sorts the frequency counts and prints out the resulting frequencies in descending
-	 * order to the frequency file in 'word|count' format.
-	 */
-	public void collectionProcessComplete(ProcessTrace arg0) throws ResourceProcessException, IOException 
-	{
-		//sortedFreqs will contain objects of type Object[] of length 2.  The first object in the array
-		//will hold the token and the second the frequency.  We want to sort on the frequency first in 
-		//descending order and token in ascending order for those tokens with the same frequency. 
-		TreeSet sortedFreqs = new TreeSet(
-				new Comparator() {
-					public int compare(Object obj1, Object obj2)
-					{
-						Object[] tokenFreq1 = (Object[]) obj1;
-						Object[] tokenFreq2 = (Object[]) obj2;
-						Integer freq1 = (Integer)tokenFreq1[1];
-						Integer freq2 = (Integer)tokenFreq2[1];
-						if(!freq2.equals(freq1))
-							return freq2.compareTo(freq1);
-						String token1 = (String)tokenFreq1[0];
-						String token2 = (String)tokenFreq2[0];
-						return token1.compareTo(token2); 
-					}
-				});
-		
-		Iterator words = wordFreqs.keySet().iterator();
-		while(words.hasNext())
-		{
-			String word = (String) words.next();
-			int freq = ((int[])(wordFreqs.get(word)))[0];
-			sortedFreqs.add(new Object[] {word,new Integer(freq)});
-		}
-		
-		PrintStream out = new PrintStream(new FileOutputStream(wordFreqFile));
-		Iterator freqs = sortedFreqs.iterator(); 
-		while(freqs.hasNext())
-		{
-			Object[] tokenFreq = (Object[]) freqs.next();
-			String word = (String) tokenFreq[0];
-			int freq = ((Integer)tokenFreq[1]).intValue();
-			out.println(word+"|"+freq);
-		}
-		out.flush();
-		out.close();
-	}
-}
+package edu.mayo.bmi.uima.core.cc;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeSet;
+
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.jcas.JFSIndexRepository;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+import org.apache.uima.util.ProcessTrace;
+
+import edu.mayo.bmi.uima.core.type.syntax.WordToken;
+
+/**
+ * This class creates a file that contains the frequencies of the word tokens found in a set
+ * in a text collection.  This cas consumer could potentially be used to create a frequency
+ * file for any kind of annotation but only counts Token annotations at the moment.   
+ * 
+ *  @see edu.mayo.bmi.uima.core.type.syntax.WordToken.java
+ */
+
+public class TokenFreqCasConsumer extends CasConsumer_ImplBase
+{ 
+	/**
+	 * The name of the parameter that is specifies the path of the output file in the
+	 * descriptor file.  The value is "TokenFreqFile" and should be set in the descriptor
+	 * file.
+	 */
+
+	public static final String PARAM_WORD_FREQ_FILE = "TokenFreqFile";
+	File wordFreqFile;
+	Map wordFreqs;
+
+/**
+ * This method opens/creates the file specified by "TokenFreqFile" and initializes the 
+ * data structure that will keep track of frequency counts.
+ * @see org.apache.uima.collection.CasConsumer_ImplBase#initialize()
+ */	
+	public void initialize() throws ResourceInitializationException 
+	{
+		try
+		{
+			String wordFreqFileName = (String) getConfigParameterValue(PARAM_WORD_FREQ_FILE);
+			wordFreqFile = new File(wordFreqFileName);
+			if(!wordFreqFile.exists())
+			{
+				wordFreqFile.createNewFile();
+			}
+		}
+		catch(Exception ioe)
+		{
+			throw new ResourceInitializationException(ioe);
+		}
+		wordFreqs = new HashMap();
+	}
+
+	/**
+	 * Iterates through all of the WordTokenAnnotation's, gets the covered text for each annotation
+	 * and increments the frequency count for that text.  
+	 * 
+	 * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS)
+	 */
+	public void processCas(CAS cas) throws ResourceProcessException 
+	{
+		try 
+		{
+			JCas jcas;
+			jcas = cas.getJCas();
+			JFSIndexRepository indexes = jcas.getJFSIndexRepository();
+	        Iterator tokenItr = indexes.getAnnotationIndex(WordToken.type).iterator();
+	        while (tokenItr.hasNext())
+	        {
+	        	WordToken token = (WordToken) tokenItr.next();
+	        	String text = token.getCoveredText();
+	        	if(!wordFreqs.containsKey(text))
+	        	{
+	        		wordFreqs.put(text, new int[1]);
+	        	}
+	        	((int[])wordFreqs.get(text))[0]++;
+	        }	        	
+		}
+		catch(Exception exception)
+		{
+			throw new ResourceProcessException(exception);
+		}
+	}
+
+	/**
+	 * This method sorts the frequency counts and prints out the resulting frequencies in descending
+	 * order to the frequency file in 'word|count' format.
+	 */
+	public void collectionProcessComplete(ProcessTrace arg0) throws ResourceProcessException, IOException 
+	{
+		//sortedFreqs will contain objects of type Object[] of length 2.  The first object in the array
+		//will hold the token and the second the frequency.  We want to sort on the frequency first in 
+		//descending order and token in ascending order for those tokens with the same frequency. 
+		TreeSet sortedFreqs = new TreeSet(
+				new Comparator() {
+					public int compare(Object obj1, Object obj2)
+					{
+						Object[] tokenFreq1 = (Object[]) obj1;
+						Object[] tokenFreq2 = (Object[]) obj2;
+						Integer freq1 = (Integer)tokenFreq1[1];
+						Integer freq2 = (Integer)tokenFreq2[1];
+						if(!freq2.equals(freq1))
+							return freq2.compareTo(freq1);
+						String token1 = (String)tokenFreq1[0];
+						String token2 = (String)tokenFreq2[0];
+						return token1.compareTo(token2); 
+					}
+				});
+		
+		Iterator words = wordFreqs.keySet().iterator();
+		while(words.hasNext())
+		{
+			String word = (String) words.next();
+			int freq = ((int[])(wordFreqs.get(word)))[0];
+			sortedFreqs.add(new Object[] {word,new Integer(freq)});
+		}
+		
+		PrintStream out = new PrintStream(new FileOutputStream(wordFreqFile));
+		Iterator freqs = sortedFreqs.iterator(); 
+		while(freqs.hasNext())
+		{
+			Object[] tokenFreq = (Object[]) freqs.next();
+			String word = (String) tokenFreq[0];
+			int freq = ((Integer)tokenFreq[1]).intValue();
+			out.println(word+"|"+freq);
+		}
+		out.flush();
+		out.close();
+	}
+}

Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/TokenOffsetsCasConsumer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/TokenOffsetsCasConsumer.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/TokenOffsetsCasConsumer.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/uima/core/cc/TokenOffsetsCasConsumer.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
 /*
- * Copyright: (c) 2009   Mayo Foundation for Medical Education and 
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify 
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  * http://www.apache.org/licenses/LICENSE-2.0 
  * 
  * Unless required by applicable law or agreed to in writing, software
@@ -21,98 +14,98 @@
  * See the License for the specific language governing permissions and 
  * limitations under the License. 
  */
-package edu.mayo.bmi.uima.core.cc;
-
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.uima.cas.CAS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.jcas.JFSIndexRepository;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-
-import edu.mayo.bmi.uima.core.type.syntax.BaseToken;
-import edu.mayo.bmi.uima.core.util.DocumentIDAnnotationUtil;
-
-/**
- * For each CAS a local file with the offsets of the BaseToken annotations is written to a directory specifed by a parameter.
- * The format of the output files is
- * 0|13
- * 17|19
- * 19|20
- * ...
- *   
- * This CAS consumer does not make use of any annotation information in the 
- * cas except for the document id specified the CommonTypeSystem.xml 
- * descriptor and the BaseToken annotations.  The document id will be the 
- * name of the file written for each CAS.  
- * 
- * This CAS consumer was written so that token offsets could be written to 
- * a file.  The offsets were compared to similarly generated annotation offsets
- * from Knowtator annotations.  
- */
-
-public class TokenOffsetsCasConsumer extends CasConsumer_ImplBase {
-
-	public static final String PARAM_OUTPUTDIR = "OutputDirectory";
-
-	File iv_outputDirectory;
-	
-	public void initialize() throws ResourceInitializationException 
-	{
-	    String outputDirectoryName = (String)getConfigParameterValue(PARAM_OUTPUTDIR);
-	    iv_outputDirectory = new File(outputDirectoryName);
-	    if(!iv_outputDirectory.exists() || !iv_outputDirectory.isDirectory())
-	    	throw new ResourceInitializationException(
-	    			new Exception("Parameter setting 'OutputDirectory' does not point to an existing directory."));
-	}
-	
-	public void processCas(CAS cas) throws ResourceProcessException 
-	{
-		try 
-		{
-			JCas jcas;
-			jcas = cas.getJCas();
-		
-			List offsets = new ArrayList();
-			JFSIndexRepository indexes = jcas.getJFSIndexRepository();
-			Iterator tokenItr = indexes.getAnnotationIndex(BaseToken.type).iterator();
-	        while (tokenItr.hasNext())
-	        {
-	        	BaseToken token = (BaseToken) tokenItr.next();
-	        	String offset = ""+token.getBegin()+"|"+token.getEnd();
-	        	offsets.add(offset);
-	        }	        	
-
-	        String documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
-			writeToFile(documentID, offsets);
-			
-		}
-		catch(Exception e)
-		{
-			throw new ResourceProcessException(e);
-		}
-	}
-	
-	private void writeToFile(String documentID, List offsets) throws IOException
-	{
-		File outputFile = new File(iv_outputDirectory, documentID);
-		outputFile.createNewFile();
-		OutputStream out = new BufferedOutputStream(new FileOutputStream(outputFile));
-		for(int i=0; i<offsets.size(); i++)
-		{
-			String offset = (String) offsets.get(i)+"\n";
-			out.write(offset.getBytes());
-		}
-		out.flush();
-		out.close();
-	}
-}
+package edu.mayo.bmi.uima.core.cc;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.jcas.JFSIndexRepository;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+
+import edu.mayo.bmi.uima.core.type.syntax.BaseToken;
+import edu.mayo.bmi.uima.core.util.DocumentIDAnnotationUtil;
+
+/**
+ * For each CAS a local file with the offsets of the BaseToken annotations is written to a directory specifed by a parameter.
+ * The format of the output files is
+ * 0|13
+ * 17|19
+ * 19|20
+ * ...
+ *   
+ * This CAS consumer does not make use of any annotation information in the 
+ * cas except for the document id specified the CommonTypeSystem.xml 
+ * descriptor and the BaseToken annotations.  The document id will be the 
+ * name of the file written for each CAS.  
+ * 
+ * This CAS consumer was written so that token offsets could be written to 
+ * a file.  The offsets were compared to similarly generated annotation offsets
+ * from Knowtator annotations.  
+ */
+
+public class TokenOffsetsCasConsumer extends CasConsumer_ImplBase {
+
+	public static final String PARAM_OUTPUTDIR = "OutputDirectory";
+
+	File iv_outputDirectory;
+	
+	public void initialize() throws ResourceInitializationException 
+	{
+	    String outputDirectoryName = (String)getConfigParameterValue(PARAM_OUTPUTDIR);
+	    iv_outputDirectory = new File(outputDirectoryName);
+	    if(!iv_outputDirectory.exists() || !iv_outputDirectory.isDirectory())
+	    	throw new ResourceInitializationException(
+	    			new Exception("Parameter setting 'OutputDirectory' does not point to an existing directory."));
+	}
+	
+	public void processCas(CAS cas) throws ResourceProcessException 
+	{
+		try 
+		{
+			JCas jcas;
+			jcas = cas.getJCas();
+		
+			List offsets = new ArrayList();
+			JFSIndexRepository indexes = jcas.getJFSIndexRepository();
+			Iterator tokenItr = indexes.getAnnotationIndex(BaseToken.type).iterator();
+	        while (tokenItr.hasNext())
+	        {
+	        	BaseToken token = (BaseToken) tokenItr.next();
+	        	String offset = ""+token.getBegin()+"|"+token.getEnd();
+	        	offsets.add(offset);
+	        }	        	
+
+	        String documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
+			writeToFile(documentID, offsets);
+			
+		}
+		catch(Exception e)
+		{
+			throw new ResourceProcessException(e);
+		}
+	}
+	
+	private void writeToFile(String documentID, List offsets) throws IOException
+	{
+		File outputFile = new File(iv_outputDirectory, documentID);
+		outputFile.createNewFile();
+		OutputStream out = new BufferedOutputStream(new FileOutputStream(outputFile));
+		for(int i=0; i<offsets.size(); i++)
+		{
+			String offset = (String) offsets.get(i)+"\n";
+			out.write(offset.getBytes());
+		}
+		out.flush();
+		out.close();
+	}
+}



Mime
View raw message