ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From c...@apache.org
Subject svn commit: r1577303 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/ ae/feature/treekernel/ eval/
Date Thu, 13 Mar 2014 20:03:10 GMT
Author: clin
Date: Thu Mar 13 20:03:10 2014
New Revision: 1577303

URL: http://svn.apache.org/r1577303
Log:
create tree-kernel-version DocTimeRel evaluation code, which will call DocTimeRelWithTreeAnnotator.
Tried to derived single constituent tree, and simple dependency tree.

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelWithTreeAnnotator.java
  (with props)
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/DependencySingleTreeExtractor.java
  (with props)
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/SyntaticSingleTreeExtractor.java
  (with props)
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventPropertiesTk.java
  (with props)

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelWithTreeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelWithTreeAnnotator.java?rev=1577303&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelWithTreeAnnotator.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelWithTreeAnnotator.java
Thu Mar 13 20:03:10 2014
@@ -0,0 +1,136 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.util.List;
+
+import org.apache.ctakes.temporal.ae.feature.ClosestVerbExtractor;
+import org.apache.ctakes.temporal.ae.feature.DateAndMeasurementExtractor;
+import org.apache.ctakes.temporal.ae.feature.EventPropertyExtractor;
+import org.apache.ctakes.temporal.ae.feature.NearbyVerbTenseXExtractor;
+import org.apache.ctakes.temporal.ae.feature.SectionHeaderExtractor;
+import org.apache.ctakes.temporal.ae.feature.TimeXExtractor;
+import org.apache.ctakes.temporal.ae.feature.treekernel.DependencySingleTreeExtractor;
+import org.apache.ctakes.temporal.ae.feature.treekernel.SyntaticSingleTreeExtractor;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.DataWriter;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.Instance;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Covered;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Following;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Preceding;
+import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
+import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
+import org.cleartk.classifier.jar.DefaultDataWriterFactory;
+import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
+import org.cleartk.classifier.jar.GenericJarClassifierFactory;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+public class DocTimeRelWithTreeAnnotator extends CleartkAnnotator<String> {
+
+  public static AnalysisEngineDescription createDataWriterDescription(
+      Class<? extends DataWriter<String>> dataWriterClass,
+      File outputDirectory) throws ResourceInitializationException {
+    return AnalysisEngineFactory.createPrimitiveDescription(
+        DocTimeRelWithTreeAnnotator.class,
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        true,
+        DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+        dataWriterClass,
+        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+        outputDirectory);
+  }
+
+  public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+      throws ResourceInitializationException {
+    return AnalysisEngineFactory.createPrimitiveDescription(
+        DocTimeRelWithTreeAnnotator.class,
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        false,
+        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+        new File(modelDirectory, "model.jar"));
+  }
+
+  private CleartkExtractor contextExtractor;
+  private NearbyVerbTenseXExtractor verbTensePatternExtractor;
+  private SectionHeaderExtractor sectionIDExtractor;
+  private ClosestVerbExtractor closestVerbExtractor;
+  private TimeXExtractor timeXExtractor;
+  private EventPropertyExtractor genericExtractor;
+  private DateAndMeasurementExtractor dateExtractor;
+  private DependencySingleTreeExtractor depTreeEctractor;
+  private SyntaticSingleTreeExtractor singleTreeExtractor;
+
+  @Override
+  public void initialize(UimaContext context) throws ResourceInitializationException {
+    super.initialize(context);
+    CombinedExtractor baseExtractor = new CombinedExtractor(
+        new CoveredTextExtractor(),
+        new TypePathExtractor(BaseToken.class, "partOfSpeech"));
+    this.contextExtractor = new CleartkExtractor(
+        BaseToken.class,
+        baseExtractor,
+        new Preceding(3),
+        new Covered(),
+        new Following(3));
+    this.verbTensePatternExtractor = new NearbyVerbTenseXExtractor();
+    this.sectionIDExtractor = new SectionHeaderExtractor();
+    this.closestVerbExtractor = new ClosestVerbExtractor();
+    this.timeXExtractor = new TimeXExtractor();
+    this.genericExtractor = new EventPropertyExtractor();
+    this.dateExtractor = new DateAndMeasurementExtractor();
+    this.depTreeEctractor = new DependencySingleTreeExtractor();
+    this.singleTreeExtractor = new SyntaticSingleTreeExtractor();
+  }
+
+  @Override
+  public void process(JCas jCas) throws AnalysisEngineProcessException {
+    for (EventMention eventMention : JCasUtil.select(jCas, EventMention.class)) {
+      if (eventMention.getEvent() != null) {
+        List<Feature> features = this.contextExtractor.extract(jCas, eventMention);
+        features.addAll(this.verbTensePatternExtractor.extract(jCas, eventMention));//add
nearby verb POS pattern feature
+        features.addAll(this.sectionIDExtractor.extract(jCas, eventMention)); //add section
heading
+        features.addAll(this.closestVerbExtractor.extract(jCas, eventMention)); //add closest
verb
+        features.addAll(this.timeXExtractor.extract(jCas, eventMention)); //add the closest
time expression types
+        features.addAll(this.genericExtractor.extract(jCas, eventMention)); //add the closest
time expression types
+        features.addAll(this.dateExtractor.extract(jCas, eventMention)); //add the closest
NE type
+        features.addAll(this.depTreeEctractor.extract(jCas, eventMention));
+        features.addAll(this.singleTreeExtractor.extract(jCas, eventMention));//add the single
tree that covers the event.
+        if (this.isTraining()) {
+          String outcome = eventMention.getEvent().getProperties().getDocTimeRel();
+          this.dataWriter.write(new Instance<String>(outcome, features));
+        } else {
+          String outcome = this.classifier.classify(features);
+          eventMention.getEvent().getProperties().setDocTimeRel(outcome);
+        }
+      }
+    }
+  }
+}

Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelWithTreeAnnotator.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/DependencySingleTreeExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/DependencySingleTreeExtractor.java?rev=1577303&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/DependencySingleTreeExtractor.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/DependencySingleTreeExtractor.java
Thu Mar 13 20:03:10 2014
@@ -0,0 +1,105 @@
+package org.apache.ctakes.temporal.ae.feature.treekernel;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.dependency.parser.util.AnnotationDepUtils;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.TreeFeature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.uimafit.util.JCasUtil;
+/**
+ * Given a focused annotation, get the whole sentence-level dependency tree that cover this
annotation.
+ * @author CH151862
+ *
+ */
+public class DependencySingleTreeExtractor implements SimpleFeatureExtractor {
+
+	public static final String FEAT_NAME = "TK_DepSingleT";
+
+	@Override
+//	public List<Feature> extract(JCas view, Annotation focusAnnotation)
+//			throws CleartkExtractorException {
+//		List<Feature> features = new ArrayList<Feature>();
+//		//1 generate event annotation array and label array
+//		Annotation[] annotations = {focusAnnotation};
+//		String[] labels ={"EVENT"};
+//		
+//		//2 get covering sentence:
+//		Map<EventMention, Collection<Sentence>> coveringMap =
+//				JCasUtil.indexCovering(view, EventMention.class, Sentence.class);
+//		EventMention targetTokenAnnotation = (EventMention)focusAnnotation;
+//		Collection<Sentence> sentList = coveringMap.get(targetTokenAnnotation);
+//		
+//		//3 extract trees:
+//		String dtreeStr ="(TOP (EVENT " + focusAnnotation.getCoveredText().trim() + "))";
+//		if (sentList != null && !sentList.isEmpty()){
+//			for(Sentence sent : sentList) {
+//				List<ConllDependencyNode> nodes = JCasUtil.selectCovered(view, ConllDependencyNode.class,
sent);
+//				
+//				if(nodes!=null && !nodes.isEmpty()){
+//					String treeStr = AnnotationDepUtils.getTokenTreeString(view, nodes, annotations, labels,
true);
+//					if(treeStr != null){
+//						dtreeStr = treeStr;
+//						break;
+//					}
+//				}
+//			}
+//		}
+//		
+//		features.add(new TreeFeature(FEAT_NAME, dtreeStr));
+//		
+//		return features;
+//	}
+
+	public List<Feature> extract(JCas view, Annotation focusAnnotation)
+			throws CleartkExtractorException {
+		List<Feature> features = new ArrayList<Feature>();
+		String dtreeStr ="(TOP (EVENT " + focusAnnotation.getCoveredText().trim() + "))";
+		//find the colldepnode covered by focusAnnotation:
+		for(ConllDependencyNode node : JCasUtil.selectCovered(view, ConllDependencyNode.class,
focusAnnotation)){
+			//find if it has head:
+			ConllDependencyNode head = node.getHead();
+			SimpleTree curTree = null;
+			SimpleTree headTree = null;
+			
+			if(head == null) { //if the current node is the root, then not right
+				continue;
+			}
+
+//			curTree = SimpleTree.fromString(String.format("(%s %s)", node.getDeprel(), SimpleTree.escapeCat(node.getCoveredText().trim())));
+			curTree = SimpleTree.fromString(String.format("(%s %s)", node.getDeprel(), node.getPostag()));
+
+
+
+			while(head.getHead() != null){ //while head node is not the root
+//              String token = node.getHead().getHead() == null ? "TOP" : node.getHead().getCoveredText();
+//				headTree = SimpleTree.fromString(String.format("(%s %s)", head.getDeprel(), SimpleTree.escapeCat(head.getCoveredText().trim())));
+				headTree = SimpleTree.fromString(String.format("(%s %s)", head.getDeprel(), head.getPostag()));
+				curTree.parent = headTree.children.get(0);
+				headTree.children.get(0).addChild(curTree);
+				curTree = headTree;
+				head = head.getHead();
+			} 
+			if(headTree==null){
+				curTree = SimpleTree.fromString(String.format("(%s (%s %s))",node.getDeprel(), node.getPostag(),"null"));
+				dtreeStr = curTree.toString();
+			}else{
+				dtreeStr = headTree.toString();
+			}
+			break;
+		}
+		
+		features.add(new TreeFeature(FEAT_NAME, dtreeStr));
+		return features;
+	}
+}

Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/DependencySingleTreeExtractor.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/SyntaticSingleTreeExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/SyntaticSingleTreeExtractor.java?rev=1577303&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/SyntaticSingleTreeExtractor.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/SyntaticSingleTreeExtractor.java
Thu Mar 13 20:03:10 2014
@@ -0,0 +1,58 @@
+package org.apache.ctakes.temporal.ae.feature.treekernel;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.constituency.parser.treekernel.TreeExtractor;
+import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
+import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.TreeFeature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+/**
+ * Given a focused annotation, get the whole sentence-level parse tree that cover this annotation.
+ * @author CH151862
+ *
+ */
+public class SyntaticSingleTreeExtractor implements SimpleFeatureExtractor {
+
+	public static final String FEAT_NAME = "TK_SingleT";
+
+	@Override
+	public List<Feature> extract(JCas view, Annotation focusAnnotation)
+			throws CleartkExtractorException {
+		List<Feature> features = new ArrayList<Feature>();
+		// first get the root and print it out...
+		TopTreebankNode root = AnnotationTreeUtils.getTreeCopy(view, AnnotationTreeUtils.getAnnotationTree(view,
focusAnnotation));
+
+		if(root == null){
+			SimpleTree fakeTree = new SimpleTree("(S (NN null))");
+			features.add(new TreeFeature(FEAT_NAME, fakeTree.toString()));
+			return features;
+		}
+
+
+		String etype="";
+		String eventModality="";
+
+		if(focusAnnotation instanceof EventMention){
+			eventModality = ((EventMention)focusAnnotation).getEvent().getProperties().getContextualModality();
+			etype = "EVENT-"+eventModality;
+			AnnotationTreeUtils.insertAnnotationNode(view, root, focusAnnotation, etype);
+		}
+	
+		SimpleTree tree = null;
+		tree = TreeExtractor.getSimpleClone(root);
+
+		TemporalPETExtractor.moveTimexDownToNP(tree);
+
+		features.add(new TreeFeature(FEAT_NAME, tree.toString()));
+		return features;
+	}
+
+}

Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/SyntaticSingleTreeExtractor.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventPropertiesTk.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventPropertiesTk.java?rev=1577303&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventPropertiesTk.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventPropertiesTk.java
Thu Mar 13 20:03:10 2014
@@ -0,0 +1,330 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.FileHandler;
+import java.util.logging.Formatter;
+import java.util.logging.Level;
+import java.util.logging.LogRecord;
+import java.util.logging.Logger;
+
+//import org.apache.ctakes.temporal.ae.ContextualModalityAnnotator;
+import org.apache.ctakes.temporal.ae.DocTimeRelWithTreeAnnotator;
+import org.apache.ctakes.temporal.eval.EvaluationOfEventTimeRelations.ParameterSettings;
+import org.apache.ctakes.typesystem.type.refsem.EventProperties;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.FileUtils;
+import org.cleartk.classifier.jar.JarClassifierBuilder;
+//import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
+import org.cleartk.classifier.tksvmlight.model.CompositeKernel.ComboOperator;
+//import org.cleartk.classifier.liblinear.LIBLINEARStringOutcomeDataWriter;
+import org.cleartk.eval.AnnotationStatistics;
+import org.cleartk.ml.libsvm.tk.TKLIBSVMStringOutcomeDataWriter;
+import org.cleartk.util.ViewURIUtil;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.pipeline.JCasIterable;
+import org.uimafit.pipeline.SimplePipeline;
+//import org.uimafit.testing.util.HideOutput;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Maps;
+import com.lexicalscope.jewel.cli.CliFactory;
+import com.lexicalscope.jewel.cli.Option;
+
+public class EvaluationOfEventPropertiesTk extends
+Evaluation_ImplBase<Map<String, AnnotationStatistics<String>>> {
+	static interface TempRelOptions extends Evaluation_ImplBase.Options{
+		@Option
+		public boolean getPrintFormattedRelations();
+
+		@Option
+		public boolean getBaseline();
+
+		@Option
+		public boolean getClosure();
+
+		@Option
+		public boolean getUseTmp();
+
+		@Option
+		public boolean getUseGoldAttributes();
+	}
+
+	private static final String DOC_TIME_REL = "docTimeRel";
+	private static final String CONTEXTUAL_MODALITY = "contextualModality";
+
+	private static final List<String> PROPERTY_NAMES = Arrays.asList(DOC_TIME_REL, CONTEXTUAL_MODALITY);
+
+	protected static boolean DEFAULT_BOTH_DIRECTIONS = false;
+	protected static float DEFAULT_DOWNSAMPLE = 1.0f;
+	protected static ParameterSettings allParams = new ParameterSettings(DEFAULT_BOTH_DIRECTIONS,
DEFAULT_DOWNSAMPLE, "tk",
+			10.0, 1.0, "polynomial", ComboOperator.SUM, 1, 0.4);
+	public static void main(String[] args) throws Exception {
+		ParameterSettings params = allParams;
+		TempRelOptions options = CliFactory.parseArguments(TempRelOptions.class, args);
+		List<Integer> patientSets = options.getPatients().getList();
+		List<Integer> trainItems = THYMEData.getTrainPatientSets(patientSets);
+		List<Integer> devItems = THYMEData.getDevPatientSets(patientSets);
+		List<Integer> testItems = THYMEData.getTestPatientSets(patientSets);
+
+		try{
+			File workingDir = new File("target/eval/event-properties");
+			if(!workingDir.exists()) workingDir.mkdirs();
+			if(options.getUseTmp()){
+				File tempModelDir = File.createTempFile("temporal", null, workingDir);
+				tempModelDir.delete();
+				tempModelDir.mkdir();
+				workingDir = tempModelDir;
+			}
+
+			EvaluationOfEventPropertiesTk evaluation = new EvaluationOfEventPropertiesTk(
+					workingDir,
+					options.getRawTextDirectory(),
+					options.getXMLDirectory(),
+					options.getXMLFormat(),
+					options.getXMIDirectory(),
+					options.getKernelParams(),
+					params);
+			evaluation.prepareXMIsFor(patientSets);
+			evaluation.logClassificationErrors(workingDir, "ctakes-event-property-errors");
+			
+			List<Integer> training = trainItems;
+			List<Integer> testing = null;
+			if(options.getTest()){
+				training.addAll(devItems);
+				testing = testItems;
+			}else{
+				testing = devItems;
+			}
+			Map<String, AnnotationStatistics<String>> stats = evaluation.trainAndTest(training,
testing);
+			
+			for (String name : PROPERTY_NAMES) {
+				System.err.println("====================");
+				System.err.println(name);
+				System.err.println("--------------------");
+				System.err.println(stats.get(name));
+			}
+			if(options.getUseTmp()){
+				// won't work because it's not empty. should we be concerned with this or is it responsibility
of 
+				// person invoking the tmp flag?
+				FileUtils.deleteRecursive(workingDir);
+			}
+		}catch(ResourceInitializationException e){
+			System.err.println("Error with parameter settings: " + params);
+			e.printStackTrace();
+		}
+	}
+
+	private Map<String, Logger> loggers = Maps.newHashMap();
+	protected ParameterSettings params = null;
+
+	public EvaluationOfEventPropertiesTk(
+			File baseDirectory,
+			File rawTextDirectory,
+			File xmlDirectory,
+			XMLFormat xmlFormat,
+			File xmiDirectory,
+			String kernelParams,
+			ParameterSettings params) {
+		super(baseDirectory, rawTextDirectory, xmlDirectory, xmlFormat, xmiDirectory, null);
+		this.params = params;
+		this.kernelParams = kernelParams == null ? null : kernelParams.split(" ");
+		for (String name : PROPERTY_NAMES) {
+			this.loggers.put(name, Logger.getLogger(String.format("%s.%s", this.getClass().getName(),
name)));
+		}
+	}
+
+	@Override
+	protected void train(CollectionReader collectionReader, File directory) throws Exception
{
+		AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
+		aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class));
+		aggregateBuilder.add(CopyFromGold.getDescription(TimeMention.class));
+		aggregateBuilder.add(DocTimeRelWithTreeAnnotator.createDataWriterDescription(
+				//			  LIBSVMStringOutcomeDataWriter.class,
+				TKLIBSVMStringOutcomeDataWriter.class,
+				new File(directory, DOC_TIME_REL)));
+		//	  aggregateBuilder.add(ContextualModalityAnnotator.createDataWriterDescription(LIBSVMStringOutcomeDataWriter.class,
new File(directory, CONTEXTUAL_MODALITY)));
+		SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
+		//	      for(String propertyName : PROPERTY_NAMES){
+		//	        JarClassifierBuilder.trainAndPackage(new File(directory, propertyName), "-h","0","-c",
"1000");
+		//	      }
+		String[] optArray;
+
+		if(this.kernelParams == null){
+			ArrayList<String> svmOptions = new ArrayList<String>();
+			svmOptions.add("-c"); svmOptions.add(""+params.svmCost);        // svm cost
+			svmOptions.add("-t"); svmOptions.add(""+params.svmKernelIndex); // kernel index 
+			svmOptions.add("-d"); svmOptions.add("3");                      // degree parameter for
polynomial
+			svmOptions.add("-g"); svmOptions.add(""+params.svmGamma);
+			if(params.svmKernelIndex==ParameterSettings.SVM_KERNELS.indexOf("tk")){
+				svmOptions.add("-S"); svmOptions.add(""+params.secondKernelIndex);   // second kernel
index (similar to -t) for composite kernel
+				String comboFlag = (params.comboOperator == ComboOperator.SUM ? "+" : params.comboOperator
== ComboOperator.PRODUCT ? "*" : params.comboOperator == ComboOperator.TREE_ONLY ? "T" : "V");
+				svmOptions.add("-C"); svmOptions.add(comboFlag);
+				svmOptions.add("-L"); svmOptions.add(""+params.lambda);
+				svmOptions.add("-T"); svmOptions.add(""+params.tkWeight);
+				svmOptions.add("-N"); svmOptions.add("3");   // normalize trees and features
+			}
+			optArray = svmOptions.toArray(new String[]{});
+		}else{
+			optArray = this.kernelParams;
+			for(int i = 0; i < optArray.length; i+=2){
+				optArray[i] = "-" + optArray[i];
+			}
+		}
+
+		//    HideOutput hider = new HideOutput();
+		JarClassifierBuilder.trainAndPackage(new File(directory,DOC_TIME_REL), optArray);
+		//	  JarClassifierBuilder.trainAndPackage(new File(directory,CONTEXTUAL_MODALITY), "-h","0","-c",
"1000");
+		//    hider.restoreOutput();
+		//    hider.close();
+	}
+
+	@Override
+	protected Map<String, AnnotationStatistics<String>> test(
+			CollectionReader collectionReader,
+			File directory) throws Exception {
+		AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
+		aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class));
+		aggregateBuilder.add(CopyFromGold.getDescription(TimeMention.class));
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearEventProperties.class));
+		aggregateBuilder.add(DocTimeRelWithTreeAnnotator.createAnnotatorDescription(new File(directory,
DOC_TIME_REL)));
+		//	  aggregateBuilder.add(ContextualModalityAnnotator.createAnnotatorDescription(new File(directory,
CONTEXTUAL_MODALITY)));
+
+		Function<EventMention, ?> eventMentionToSpan = AnnotationStatistics.annotationToSpan();
+		Map<String, Function<EventMention, String>> propertyGetters;
+		propertyGetters = new HashMap<String, Function<EventMention, String>>();
+		for (String name : PROPERTY_NAMES) {
+			propertyGetters.put(name, getPropertyGetter(name));
+		}
+
+		Map<String, AnnotationStatistics<String>> statsMap = new HashMap<String,
AnnotationStatistics<String>>();
+
+		for(String propertyName : PROPERTY_NAMES){
+			statsMap.put(propertyName, new AnnotationStatistics<String>());
+		}
+
+		for (JCas jCas : new JCasIterable(collectionReader, aggregateBuilder.createAggregate()))
{
+			JCas goldView = jCas.getView(GOLD_VIEW_NAME);
+			JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+			String text = goldView.getDocumentText();
+			for (Segment segment : JCasUtil.select(jCas, Segment.class)) {
+				if (!THYMEData.SEGMENTS_TO_SKIP.contains(segment.getId())) {
+					List<EventMention> goldEvents = selectExact(goldView, EventMention.class, segment);
+					List<EventMention> systemEvents = selectExact(systemView, EventMention.class,
segment);
+					for (String name : PROPERTY_NAMES) {
+						this.loggers.get(name).fine("Errors in : " + ViewURIUtil.getURI(jCas).toString());
+						Function<EventMention, String> getProperty = propertyGetters.get(name);
+						statsMap.get(name).add(
+								goldEvents,
+								systemEvents,
+								eventMentionToSpan,
+								getProperty);
+						for (int i = 0; i < goldEvents.size(); ++i) {
+							String goldOutcome = getProperty.apply(goldEvents.get(i));
+							String systemOutcome = getProperty.apply(systemEvents.get(i));
+							if (!goldOutcome.equals(systemOutcome)) {
+								EventMention event = goldEvents.get(i);
+								int begin = event.getBegin();
+								int end = event.getEnd();
+								int windowBegin = Math.max(0, begin - 50);
+								int windowEnd = Math.min(text.length(), end + 50);
+								this.loggers.get(name).fine(String.format(
+										"%s was %s but should be %s, in  ...%s[!%s!:%d-%d]%s...",
+										name,
+										systemOutcome,
+										goldOutcome,
+										text.substring(windowBegin, begin).replaceAll("[\r\n]", " "),
+										text.substring(begin, end),
+										begin,
+										end,
+										text.substring(end, windowEnd).replaceAll("[\r\n]", " ")));
+							}
+						}
+					}
+				}
+			}
+		}
+		return statsMap;
+	}
+
+	public void logClassificationErrors(File outputDir, String outputFilePrefix) throws IOException
{
+		if (!outputDir.exists()) {
+			outputDir.mkdirs();
+		}
+		for (String name : PROPERTY_NAMES) {
+			Logger logger = this.loggers.get(name);
+			logger.setLevel(Level.FINE);
+			File outputFile = new File(outputDir, String.format("%s.%s.log", outputFilePrefix, name));
+			FileHandler handler = new FileHandler(outputFile.getPath());
+			handler.setFormatter(new Formatter() {
+				@Override
+				public String format(LogRecord record) {
+					return record.getMessage() + '\n';
+				}
+			});
+			logger.addHandler(handler);
+		}
+	}
+
+	private static Function<EventMention, String> getPropertyGetter(final String propertyName)
{
+		return new Function<EventMention, String>() {
+			@Override
+			public String apply(EventMention eventMention) {
+				EventProperties eventProperties = eventMention.getEvent().getProperties();
+				Feature feature = eventProperties.getType().getFeatureByBaseName(propertyName);
+				return eventProperties.getFeatureValueAsString(feature);
+			}
+		};
+	}
+
+	public static class ClearEventProperties extends JCasAnnotator_ImplBase {
+		@Override
+		public void process(JCas jCas) throws AnalysisEngineProcessException {
+			for (EventProperties eventProperties : JCasUtil.select(jCas, EventProperties.class)) {
+				eventProperties.setAspect(null);
+				eventProperties.setCategory(null);
+				eventProperties.setContextualAspect(null);
+				eventProperties.setContextualModality(null);
+				eventProperties.setDegree(null);
+				eventProperties.setDocTimeRel(null);
+				eventProperties.setPermanence(null);
+				eventProperties.setPolarity(0);
+			}
+		}
+
+	}
+}

Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventPropertiesTk.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain



Mime
View raw message