ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1477702 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/ ae/feature/ eval/
Date Tue, 30 Apr 2013 15:57:25 GMT
Author: tmill
Date: Tue Apr 30 15:57:24 2013
New Revision: 1477702

URL: http://svn.apache.org/r1477702
Log:
Add in treebank reading and a few evaluation options to temporal relation project.

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMETreebankReader.java
Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeFlatTreeFeatureExtractor.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TemporalPETExtractor.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMETreebankReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMETreebankReader.java?rev=1477702&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMETreebankReader.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMETreebankReader.java
Tue Apr 30 15:57:24 2013
@@ -0,0 +1,265 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.TerminalTreebankNode;
+import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.FileUtils;
+import org.apache.uima.util.Level;
+import org.cleartk.syntax.constituent.util.TreebankFormatParser;
+import org.cleartk.util.UIMAUtil;
+import org.cleartk.util.ViewURIUtil;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+public class THYMETreebankReader extends JCasAnnotator_ImplBase {
+
+	public static Logger logger = Logger.getLogger(THYMETreebankReader.class);
+	public static final String TREEBANK_DIRECTORY = "treebankDirectory";
+	private static final Pattern headerPatt = Pattern.compile("\\[(meta|start|end) [^\\]]*?\\]");
//"\\[meta [^\\]]*\\]");
+	
+	@ConfigurationParameter(name = TREEBANK_DIRECTORY, mandatory = true)
+	protected File treebankDirectory;
+	File[] subdirs = null;
+
+	@Override
+	public void initialize(UimaContext aContext) throws ResourceInitializationException {
+		super.initialize(aContext);
+		subdirs = treebankDirectory.listFiles(new FileFilter(){
+			@Override
+			public boolean accept(File pathname) {
+				return pathname.isDirectory() && !pathname.isHidden();
+			}});
+	}
+
+	@Override
+	public void process(JCas jcas) throws AnalysisEngineProcessException {
+		URI uri = ViewURIUtil.getURI(jcas);
+		logger.info("Document id is: " + uri.toString());
+
+		String fn = uri.getPath().substring(uri.getPath().lastIndexOf('/')+1) + ".xml.tree";
+		File treeFile = null;
+		for(File subdir : subdirs){
+			treeFile = new File(subdir, fn);
+			if(treeFile.exists()) break;
+			treeFile = null;
+		}
+
+		if(treeFile == null){
+			this.getContext().getLogger().log(Level.WARNING,"Could not find treeFile: " + fn);
+			// FIXME do automatic parse?
+			return;
+		}
+
+		String tbText;
+
+		try {
+			tbText = FileUtils.file2String(treeFile);
+		} catch (IOException e1) {
+			// shouldn't do automatic parse here -- something wrong with file itself, not the parse
+			throw new AnalysisEngineProcessException(e1);
+		}
+
+		StringBuffer fileText = new StringBuffer(jcas.getDocumentText());
+		
+		// find and replace section headers with whitespace so the TreebankFormatParser skips over
them...
+		Matcher m = headerPatt.matcher(fileText);
+		while(m.find()){
+			int headerLen = m.group().length();
+			fileText.replace(m.start(), m.end(), getWhitespaceString(headerLen));
+		}
+		
+		List<org.cleartk.syntax.constituent.util.TopTreebankNode> utilTrees;
+		try {
+			utilTrees = TreebankFormatParser.parseDocument(tbText, 0, fileText.toString());
+		} catch (Exception e) {
+			this.getContext().getLogger().log(Level.WARNING,
+					String.format("Skipping %s due to alignment problems", fn),
+					e);
+			// FIXME - do automatic parse here...
+			
+			return;
+		}
+		
+		// if we get this far, the gold standard exists and we will let it do all of our tokenization.
+		// first we need to remove sentence and token annotations
+		List<Sentence> sents = new ArrayList<Sentence>(JCasUtil.select(jcas, Sentence.class));
+		for(Sentence sent : sents){
+			sent.removeFromIndexes();
+		}
+		List<BaseToken> toks = new ArrayList<BaseToken>(JCasUtil.select(jcas, BaseToken.class));
+		for(BaseToken tok : toks){
+			tok.removeFromIndexes();
+		}
+		
+
+		// add Token, Sentence and TreebankNode annotations for the text
+		for (org.cleartk.syntax.constituent.util.TopTreebankNode utilTree : utilTrees) {
+
+			// create a Sentence and set its parse
+			TopTreebankNode tree = convert(utilTree, jcas);
+			Sentence sentence = new Sentence(jcas, tree.getBegin(), tree.getEnd());
+			sentence.addToIndexes();
+
+			// create the Tokens and add them to the Sentence
+			for (int i = 0; i < tree.getTerminals().size(); i++) {
+				TreebankNode leaf = tree.getTerminals(i);
+				if (leaf.getBegin() != leaf.getEnd()) {
+					BaseToken token = new BaseToken(jcas, leaf.getBegin(), leaf.getEnd());
+					token.setPartOfSpeech(leaf.getNodeType());
+					token.addToIndexes();
+				}
+			}
+		}
+	}
+
+	private static String getWhitespaceString(int headerLen) {
+		char[] chars = new char[headerLen];
+		Arrays.fill(chars, ' ');
+		return new String(chars);
+	}
+
+	// the ctakes syntax typesystem was modeled after cleartk -- as a result, the following
methods borrow very liberally from 
+	// org.cleartk.syntax.constituent.util.TreebankNodeUtility, which has a convert method for
going from
+	// a "normal" tree to a cleartk/uima tree.  This does the same, except goes to a ctakes/uima
tree.
+	private static TopTreebankNode convert(org.cleartk.syntax.constituent.util.TopTreebankNode
inTree, JCas jcas){
+		TopTreebankNode outTree = new TopTreebankNode(jcas, inTree.getTextBegin(), inTree.getTextEnd());
+		outTree.setTreebankParse(inTree.getTreebankParse());
+	    convert(inTree, jcas, outTree, null);
+	    initTerminalNodes(outTree, jcas);
+
+
+		outTree.addToIndexes();
+		return outTree;
+	}
+
+	public static void initTerminalNodes(
+			TopTreebankNode uimaNode,
+			JCas jCas) {
+		List<TerminalTreebankNode> terminals = new ArrayList<TerminalTreebankNode>();
+		_initTerminalNodes(uimaNode, terminals);
+
+		for (int i = 0; i < terminals.size(); i++) {
+			TerminalTreebankNode terminal = terminals.get(i);
+			terminal.setIndex(i);
+		}
+
+		FSArray terminalsFSArray = new FSArray(jCas, terminals.size());
+		terminalsFSArray.copyFromArray(
+				terminals.toArray(new FeatureStructure[terminals.size()]),
+				0,
+				0,
+				terminals.size());
+		uimaNode.setTerminals(terminalsFSArray);
+	}
+
+	private static void _initTerminalNodes(
+			TreebankNode node,
+			List<TerminalTreebankNode> terminals) {
+		FSArray children = node.getChildren();
+		for (int i = 0; i < children.size(); i++) {
+			TreebankNode child = (TreebankNode) children.get(i);
+			if (child instanceof TerminalTreebankNode) {
+				terminals.add((TerminalTreebankNode) child);
+			} else
+				_initTerminalNodes(child, terminals);
+		}
+	}
+
+	public static TreebankNode convert(
+			org.cleartk.syntax.constituent.util.TreebankNode pojoNode,
+			JCas jCas,
+			TreebankNode uimaNode,
+			TreebankNode parentNode) {
+		uimaNode.setNodeType(pojoNode.getType());
+		uimaNode.setNodeTags(UIMAUtil.toStringArray(jCas, pojoNode.getTags()));
+		uimaNode.setNodeValue(pojoNode.getValue());
+		uimaNode.setLeaf(pojoNode.isLeaf());
+		uimaNode.setParent(parentNode);
+
+		List<TreebankNode> uimaChildren = new ArrayList<TreebankNode>();
+		for (org.cleartk.syntax.constituent.util.TreebankNode child : pojoNode.getChildren()) {
+			TreebankNode childNode;
+			if (child.isLeaf()) {
+				childNode = new TerminalTreebankNode(jCas, child.getTextBegin(), child.getTextEnd());
+			} else {
+				childNode = new TreebankNode(
+						jCas,
+						child.getTextBegin(),
+						child.getTextEnd());
+			}
+			uimaChildren.add(convert(child, jCas, childNode, uimaNode));
+			childNode.addToIndexes();
+		}
+		FSArray uimaChildrenFSArray = new FSArray(jCas, uimaChildren.size());
+		uimaChildrenFSArray.copyFromArray(
+				uimaChildren.toArray(new FeatureStructure[uimaChildren.size()]),
+				0,
+				0,
+				uimaChildren.size());
+		uimaNode.setChildren(uimaChildrenFSArray);
+		return uimaNode;
+	}
+
+	public static AnalysisEngineDescription getDescription(File treebankDirectory)
+			throws ResourceInitializationException {
+		return AnalysisEngineFactory.createPrimitiveDescription(
+				THYMETreebankReader.class,
+				THYMETreebankReader.TREEBANK_DIRECTORY,
+				treebankDirectory);
+	}
+	
+	public static void main(String[] args){
+		String testString = "[meta rev_date=\"02/20/2010\" start_date=\"02/20/2010\" rev=\"0002\"]\n\n"
+
+						    "[start section id=\"20112\"]\n\n" +
+						    "#1 Dilated esophagus on CT-scan\n" +
+						    "#2 Adenocarcinoma right colon\n" +
+						    "#3 Symptomatic anemia\n" +
+						    "#4 Hypothyroidism";
+		Matcher m = headerPatt.matcher(testString);
+//		System.out.println("Matches = " + m.matches());
+		
+		while(m.find()){
+			System.out.println("FOund match at: " + m.start() + "-" + m.end());
+		}
+	}
+}

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeFlatTreeFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeFlatTreeFeatureExtractor.java?rev=1477702&r1=1477701&r2=1477702&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeFlatTreeFeatureExtractor.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeFlatTreeFeatureExtractor.java
Tue Apr 30 15:57:24 2013
@@ -32,10 +32,10 @@ public class EventTimeFlatTreeFeatureExt
 		}
 		
 		SimpleTree bopTree = getTree(jcas, arg1, arg2, "BOP", new Function<BaseToken,String>(){public
String apply(BaseToken t){ return t.getPartOfSpeech();}});
-//		SimpleTree bowTree = getTree(jcas, arg1, arg2, "BOW", new Function<BaseToken,String>(){public
String apply(BaseToken t){ return t.getCoveredText();}});
+		SimpleTree bowTree = getTree(jcas, arg1, arg2, "BOW", new Function<BaseToken,String>(){public
String apply(BaseToken t){ return t.getCoveredText();}});
 		
 		feats.add(new Feature("TK_BOP", bopTree.toString()));
-//		feats.add(new Feature("TK_BOW", bowTree.toString()));
+		feats.add(new Feature("TK_BOW", bowTree.toString()));
 		return feats;
 	}
 

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TemporalPETExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TemporalPETExtractor.java?rev=1477702&r1=1477701&r2=1477702&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TemporalPETExtractor.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TemporalPETExtractor.java
Tue Apr 30 15:57:24 2013
@@ -79,7 +79,7 @@ public class TemporalPETExtractor implem
 		return features;
 	}
 
-	private static void addOtherTimes(JCas jcas, TopTreebankNode root, IdentifiedAnnotation
arg1, IdentifiedAnnotation arg2) {
+	public static void addOtherTimes(JCas jcas, TopTreebankNode root, IdentifiedAnnotation arg1,
IdentifiedAnnotation arg2) {
 		List<TimeMention> timexes = JCasUtil.selectCovered(TimeMention.class, root);
 		
 		for(TimeMention timex : timexes){

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java?rev=1477702&r1=1477701&r2=1477702&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
Tue Apr 30 15:57:24 2013
@@ -73,7 +73,7 @@ public abstract class EvaluationOfAnnota
       File knowtatorXMLDirectory,
       File xmiDirectory,
       Class<? extends Annotation> annotationClass) {
-    super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory);
+    super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, null, xmiDirectory);
     this.annotationClass = annotationClass;
   }
   

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java?rev=1477702&r1=1477701&r2=1477702&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
Tue Apr 30 15:57:24 2013
@@ -89,7 +89,7 @@ public class EvaluationOfEventProperties
       File rawTextDirectory,
       File knowtatorXMLDirectory,
       File xmiDirectory) {
-    super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory);
+    super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, null, xmiDirectory);
     for (String name : PROPERTY_NAMES) {
       this.loggers.put(name, Logger.getLogger(String.format("%s.%s", this.getClass().getName(),
name)));
     }

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java?rev=1477702&r1=1477701&r2=1477702&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java
Tue Apr 30 15:57:24 2013
@@ -19,6 +19,7 @@
 package org.apache.ctakes.temporal.eval;
 
 import java.io.File;
+import java.net.URI;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
@@ -28,6 +29,7 @@ import java.util.Set;
 import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation.HashableArguments;
 import org.apache.ctakes.temporal.ae.EventTimeRelationAnnotator;
 import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
 import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -38,9 +40,11 @@ import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.CASException;
 import org.apache.uima.collection.CollectionReader;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
 import org.cleartk.classifier.jar.JarClassifierBuilder;
 import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
 import org.cleartk.eval.AnnotationStatistics;
+import org.cleartk.util.ViewURIUtil;
 import org.uimafit.component.JCasAnnotator_ImplBase;
 import org.uimafit.descriptor.ConfigurationParameter;
 import org.uimafit.factory.AggregateBuilder;
@@ -54,34 +58,60 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 import com.lexicalscope.jewel.cli.CliFactory;
+import com.lexicalscope.jewel.cli.Option;
 
 public class EvaluationOfTemporalRelations extends
     Evaluation_ImplBase<AnnotationStatistics<String>> {
+	
+  static interface TempRelOptions extends Evaluation_ImplBase.Options{
+    @Option
+    public boolean getTest();
+
+    @Option
+    public boolean getPrintFormattedRelations();
+  }
 
   public static void main(String[] args) throws Exception {
-    Options options = CliFactory.parseArguments(Options.class, args);
+    TempRelOptions options = CliFactory.parseArguments(TempRelOptions.class, args);
     List<Integer> patientSets = options.getPatients().getList();
     List<Integer> trainItems = THYMEData.getTrainPatientSets(patientSets);
     List<Integer> devItems = THYMEData.getDevPatientSets(patientSets);
+    List<Integer> testItems = THYMEData.getTestPatientSets(patientSets);
+    
     EvaluationOfTemporalRelations evaluation = new EvaluationOfTemporalRelations(
         new File("target/eval/temporal-relations"),
         options.getRawTextDirectory(),
         options.getKnowtatorXMLDirectory(),
         options.getXMIDirectory(),
-        options.getPrintErrors());
+        options.getTreebankDirectory(),
+        options.getPrintErrors(),
+        options.getPrintFormattedRelations());
     evaluation.prepareXMIsFor(patientSets);
-    AnnotationStatistics<String> stats = evaluation.trainAndTest(trainItems, devItems);
+    List<Integer> training = trainItems;
+    List<Integer> testing = null;
+    if(options.getTest()){
+      training.addAll(devItems);
+      testing = testItems;
+    }else{
+      testing = devItems;
+    }    
+    AnnotationStatistics<String> stats = evaluation.trainAndTest(training, testing);
     System.err.println(stats);
   }
 
+  protected boolean printRelations = false;
+  
   public EvaluationOfTemporalRelations(
       File baseDirectory,
       File rawTextDirectory,
       File knowtatorXMLDirectory,
       File xmiDirectory,
-      boolean printErrors) {
-    super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory);
+      File treebankDirectory,
+      boolean printErrors,
+      boolean printRelations) {
+    super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory, treebankDirectory);
     this.printErrors = printErrors;
+    this.printRelations = printRelations;
   }
 
   @Override
@@ -90,6 +120,7 @@ public class EvaluationOfTemporalRelatio
     aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class, TimeMention.class,
BinaryTextRelation.class));
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveNonTLINKRelations.class));
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveCrossSentenceRelations.class));
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveEventEventRelations.class));
     aggregateBuilder.add(EventTimeRelationAnnotator.createDataWriterDescription(
         LIBSVMStringOutcomeDataWriter.class,
         directory,
@@ -111,6 +142,10 @@ public class EvaluationOfTemporalRelatio
         RemoveCrossSentenceRelations.class,
         RemoveCrossSentenceRelations.PARAM_RELATION_VIEW,
         GOLD_VIEW_NAME));
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+    		RemoveEventEventRelations.class,
+    		RemoveEventEventRelations.PARAM_RELATION_VIEW,
+    		GOLD_VIEW_NAME));
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveRelations.class));
     aggregateBuilder.add(EventTimeRelationAnnotator.createAnnotatorDescription(directory));
 
@@ -132,7 +167,12 @@ public class EvaluationOfTemporalRelatio
           systemView,
           BinaryTextRelation.class);
       stats.add(goldRelations, systemRelations, getSpan, getOutcome);
-      
+      if(this.printRelations){
+    	  URI uri = ViewURIUtil.getURI(jCas);
+    	  String[] path = uri.getPath().split("/");
+    	  printRelationAnnotations(path[path.length - 1], systemRelations);
+      }
+
       if(this.printErrors){
     	  Map<HashableArguments, BinaryTextRelation> goldMap = Maps.newHashMap();
     	  for (BinaryTextRelation relation : goldRelations) {
@@ -180,6 +220,60 @@ public class EvaluationOfTemporalRelatio
 			  text.substring(begin, end).replaceAll("[\r\n]", " "));
   }
 
+  private static void printRelationAnnotations(String fileName, Collection<BinaryTextRelation>
relations) {
+
+	  for(BinaryTextRelation binaryTextRelation : relations) {
+
+		  Annotation arg1 = binaryTextRelation.getArg1().getArgument();
+		  Annotation arg2 = binaryTextRelation.getArg2().getArgument();
+
+		  String arg1Type = arg1.getClass().getSimpleName();
+		  String arg2Type = arg2.getClass().getSimpleName();
+
+		  int arg1Begin = arg1.getBegin();
+		  int arg1End = arg1.getEnd();
+		  int arg2Begin = arg2.getBegin();
+		  int arg2End = arg2.getEnd();
+
+		  String category = binaryTextRelation.getCategory();
+
+		  System.out.format("%s\t%s\t%s\t%d\t%d\t%s\t%d\t%d\n", 
+				  fileName, category, arg1Type, arg1Begin, arg1End, arg2Type, arg2Begin, arg2End);
+	  }
+  }
+
+  public static class RemoveEventEventRelations extends JCasAnnotator_ImplBase {
+
+	  public static final String PARAM_RELATION_VIEW = "RelationView";
+
+	  @ConfigurationParameter(name = PARAM_RELATION_VIEW)
+	  private String relationViewName = CAS.NAME_DEFAULT_SOFA;
+	  @Override
+	  public void process(JCas jCas) throws AnalysisEngineProcessException {
+		  JCas relationView;
+		  try {
+			  relationView = jCas.getView(this.relationViewName);
+		  } catch (CASException e) {
+			  throw new AnalysisEngineProcessException(e);
+		  }
+
+		  for(BinaryTextRelation relation : Lists.newArrayList(JCasUtil.select(relationView, BinaryTextRelation.class))){
+			  if(relation.getCategory().equals("CONTAINS")){
+				  RelationArgument arg1 = relation.getArg1();
+				  RelationArgument arg2 = relation.getArg2();
+				  if(arg1.getArgument() instanceof TimeMention && arg2.getArgument() instanceof
EventMention ||
+						  arg1.getArgument() instanceof EventMention && arg2.getArgument() instanceof
TimeMention){
+					  // these are the kind we keep.
+					  continue;
+				  }
+				  arg1.removeFromIndexes();
+				  arg2.removeFromIndexes();
+				  relation.removeFromIndexes();
+			  }
+		  }
+	  }	  
+  }
+
   public static class RemoveNonTLINKRelations extends JCasAnnotator_ImplBase {
     @Override
     public void process(JCas jCas) throws AnalysisEngineProcessException {
@@ -239,6 +333,32 @@ public class EvaluationOfTemporalRelatio
     }
   }
 
+  public static class RemoveNonContainsRelations extends JCasAnnotator_ImplBase {
+	  public static final String PARAM_RELATION_VIEW = "RelationView";
+
+	  @ConfigurationParameter(name = PARAM_RELATION_VIEW)
+	  private String relationViewName = CAS.NAME_DEFAULT_SOFA;
+
+	  @Override
+	  public void process(JCas jCas) throws AnalysisEngineProcessException {
+		  JCas relationView;
+		  try {
+			  relationView = jCas.getView(this.relationViewName);
+		  } catch (CASException e) {
+			  throw new AnalysisEngineProcessException(e);
+		  }
+		  for (BinaryTextRelation relation : Lists.newArrayList(JCasUtil.select(
+				  relationView,
+				  BinaryTextRelation.class))) {
+			  if (!relation.getCategory().startsWith("CONTAINS")) {
+				  relation.getArg1().removeFromIndexes();
+				  relation.getArg2().removeFromIndexes();
+				  relation.removeFromIndexes();
+			  }
+		  }
+	  }	  
+  }
+
   public static class RemoveRelations extends JCasAnnotator_ImplBase {
     @Override
     public void process(JCas jCas) throws AnalysisEngineProcessException {

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java?rev=1477702&r1=1477701&r2=1477702&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
Tue Apr 30 15:57:24 2013
@@ -32,10 +32,12 @@ import java.util.regex.Pattern;
 import org.apache.ctakes.chunker.ae.Chunker;
 import org.apache.ctakes.chunker.ae.DefaultChunkCreator;
 import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
+import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
 import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
 import org.apache.ctakes.core.ae.OverlapAnnotator;
 import org.apache.ctakes.core.ae.SentenceDetector;
 import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
+import org.apache.ctakes.core.resource.FileLocator;
 import org.apache.ctakes.core.resource.FileResourceImpl;
 import org.apache.ctakes.core.resource.JdbcConnectionResourceImpl;
 import org.apache.ctakes.core.resource.LuceneIndexReaderResourceImpl;
@@ -47,6 +49,7 @@ import org.apache.ctakes.lvg.ae.LvgAnnot
 import org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl;
 import org.apache.ctakes.postagger.POSTagger;
 import org.apache.ctakes.temporal.ae.THYMEKnowtatorXMLReader;
+import org.apache.ctakes.temporal.ae.THYMETreebankReader;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.syntax.Chunk;
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
@@ -109,9 +112,21 @@ public abstract class Evaluation_ImplBas
     @Option(longName = "patients")
     public CommandLine.IntegerRanges getPatients();
     
-    @Option(longName = "print-errors", defaultValue="false")
+    @Option(longName = "treebank", defaultToNull=true)
+    public File getTreebankDirectory();
+    
+    @Option
+    public boolean getGrid();
+    
+    @Option
     public boolean getPrintErrors();
-  }
+    
+    @Option
+    public boolean getMergeOverlap();
+    
+    @Option(longName = "kernelParams", defaultToNull=true)
+    public String getKernelParams();
+}
 
   protected File rawTextDirectory;
 
@@ -121,18 +136,24 @@ public abstract class Evaluation_ImplBas
 
   private boolean xmiExists;
 
-  protected boolean printErrors;
+  protected File treebankDirectory;
+  
+  protected boolean printErrors = false;
+  
+  protected String[] kernelParams;
   
   public Evaluation_ImplBase(
       File baseDirectory,
       File rawTextDirectory,
       File knowtatorXMLDirectory,
-      File xmiDirectory) {
+      File xmiDirectory,
+      File treebankDirectory) {
     super(baseDirectory);
     this.rawTextDirectory = rawTextDirectory;
     this.knowtatorXMLDirectory = knowtatorXMLDirectory;
     this.xmiDirectory = xmiDirectory;
     this.xmiExists = this.xmiDirectory.exists() && this.xmiDirectory.listFiles().length
> 0;
+    this.treebankDirectory = treebankDirectory;
   }
 
   public void prepareXMIsFor(List<Integer> patientSets) throws Exception {
@@ -221,7 +242,7 @@ public abstract class Evaluation_ImplBas
         "MaxentModel",
         ExternalResourceFactory.createExternalResourceDescription(
             SuffixMaxentModelResourceImpl.class,
-            SentenceDetector.class.getResource("../sentdetect/sdmed.mod"))));
+            FileLocator.locateFile("org/apache/ctakes/core/sentdetect/sdmed.mod").toURI().toURL())));
     // identify tokens
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
     // merge some tokens
@@ -243,7 +264,7 @@ public abstract class Evaluation_ImplBas
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
         Chunker.class,
         Chunker.CHUNKER_MODEL_FILE_PARAM,
-        Chunker.class.getResource("../models/chunk-model.claims-1.5.zip").toURI().getPath(),
+        FileLocator.locateFile("org/apache/ctakes/chunker/models/chunk-model.claims-1.5.zip"),
         Chunker.CHUNKER_CREATOR_CLASS_PARAM,
         DefaultChunkCreator.class));
 
@@ -385,6 +406,13 @@ public abstract class Evaluation_ImplBas
     // add semantic role labeler
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPSemanticRoleLabelerAE.class));
 
+    // add constituency parser (or gold standard treebank if we have it)
+    if(this.treebankDirectory != null){
+    	aggregateBuilder.add(THYMETreebankReader.getDescription(this.treebankDirectory));
+    }else{
+    	aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ConstituencyParser.class));
+    }
+
     // write out the CAS after all the above annotations
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
         XMIWriter.class,



Mime
View raw message