ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1489573 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis: EventContextAnalysisConsumer.java EventContextAnalysisPipeline.java
Date Tue, 04 Jun 2013 19:27:18 GMT
Author: dligach
Date: Tue Jun  4 19:27:18 2013
New Revision: 1489573

URL: http://svn.apache.org/r1489573
Log:
moved the consumer inside the pipeline class definition

Removed:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventContextAnalysisConsumer.java
Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventContextAnalysisPipeline.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventContextAnalysisPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventContextAnalysisPipeline.java?rev=1489573&r1=1489572&r2=1489573&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventContextAnalysisPipeline.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventContextAnalysisPipeline.java
Tue Jun  4 19:27:18 2013
@@ -18,21 +18,34 @@
  */
 package org.apache.ctakes.temporal.data.analysis;
 
+import java.io.BufferedWriter;
 import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
 import org.apache.ctakes.relationextractor.eval.XMIReader;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
 import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
 import org.cleartk.util.Options_ImplBase;
 import org.kohsuke.args4j.Option;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.descriptor.ConfigurationParameter;
 import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.factory.CollectionReaderFactory;
 import org.uimafit.pipeline.SimplePipeline;
+import org.uimafit.util.JCasUtil;
 
 /**
+ * Print events and tokens with contexts to two separeate files.
  * 
  * @author dmitriy dligach
  */
@@ -74,7 +87,7 @@ public class EventContextAnalysisPipelin
     CollectionReader collectionReader = getCollectionReader(trainFiles);
 		
     AnalysisEngine annotationConsumer = AnalysisEngineFactory.createPrimitive(
-    		EventContextAnalysisConsumer.class,
+    		EventAndTokenContextWriter.class,
     		"TokenOutputFile",
     		options.tokenOutputFile,
     		"EventOutputFile",
@@ -84,7 +97,104 @@ public class EventContextAnalysisPipelin
     		
 		SimplePipeline.runPipeline(collectionReader, annotationConsumer);
 	}
-	
+
+	/**
+	 * Print all tokens with contexts and all events with contexts for further analysis.
+	 */
+	public static class EventAndTokenContextWriter extends JCasAnnotator_ImplBase {
+
+	  @ConfigurationParameter(
+	      name = "TokenOutputFile",
+	      mandatory = true,
+	      description = "path to the file that stores token contexts")
+	  private String tokenOutputFile;
+
+	  @ConfigurationParameter(
+	      name = "EventOutputFile",
+	      mandatory = true,
+	      description = "path to the file that stores event contexts")
+	  private String eventOutputFile;
+	  
+	  @ConfigurationParameter(
+	      name = "ContextSize",
+	      mandatory = true,
+	      description = "context size in characters")
+	  private int contextSize;
+	  
+	  @Override
+	  public void process(JCas jCas) throws AnalysisEngineProcessException {
+	    
+	    JCas goldView;
+	    try {
+	      goldView = jCas.getView("GoldView");
+	    } catch (CASException e) {
+	      throw new AnalysisEngineProcessException(e);
+	    }
+	    
+	    JCas systemView;
+	    try {
+	      systemView = jCas.getView("_InitialView");
+	    } catch (CASException e) {
+	      throw new AnalysisEngineProcessException(e);
+	    }
+
+	    BufferedWriter tokenWriter = getWriter(tokenOutputFile, true);
+	    BufferedWriter eventWriter = getWriter(eventOutputFile, true);
+	    try {
+	      for(BaseToken baseToken : JCasUtil.select(systemView, BaseToken.class)) {
+	        String tokenText = baseToken.getCoveredText().toLowerCase();
+	        String output = String.format("%s|%s\n", tokenText, getAnnotationContext(baseToken,
contextSize));
+	        
+	        try {
+	          tokenWriter.write(output);
+	        } catch (IOException e) {
+	          throw new AnalysisEngineProcessException(e);
+	        }
+	      } 
+	  
+	      for(EventMention eventMention : JCasUtil.select(goldView, EventMention.class)) {
+	        String eventText = eventMention.getCoveredText().toLowerCase();
+	        String output = String.format("%s|%s\n", eventText, getAnnotationContext(eventMention,
contextSize));
+	        
+	        try {
+	          eventWriter.write(output);
+	        } catch (IOException e) {
+	          throw new AnalysisEngineProcessException(e);
+	        }
+	      }
+	    } finally {
+	      try {
+	        tokenWriter.close();
+	        eventWriter.close();
+	      } catch (IOException e) {
+	        throw new AnalysisEngineProcessException(e);
+	      }
+	    }
+	  }
+	  
+	  private static String getAnnotationContext(Annotation annotation, int maxContextWindowSize)
{
+	    
+	    String text = annotation.getCAS().getDocumentText();
+	    int begin = Math.max(0, annotation.getBegin() - maxContextWindowSize);
+	    int end = Math.min(text.length(), annotation.getEnd() + maxContextWindowSize);
+	    
+	    return text.substring(begin, end).replaceAll("[\r\n]", " ");
+	  }
+	  
+	  private static BufferedWriter getWriter(String filePath, boolean append) {
+
+	    BufferedWriter bufferedWriter = null;
+	    try {
+	      FileWriter fileWriter = new FileWriter(filePath, append);
+	      bufferedWriter = new BufferedWriter(fileWriter);
+	    } catch (IOException e) {
+	      e.printStackTrace();
+	    }
+
+	    return bufferedWriter;
+	  }
+	}
+
   private static CollectionReader getCollectionReader(List<File> inputFiles) throws
Exception {
     
     List<String> fileNames = new ArrayList<String>();



Mime
View raw message