ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From c...@apache.org
Subject svn commit: r1753407 [1/3] - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/ ae/feature/ eval/ keras/
Date Tue, 19 Jul 2016 17:14:36 GMT
Author: clin
Date: Tue Jul 19 17:14:35 2016
New Revision: 1753407

URL: http://svn.apache.org/viewvc?rev=1753407&view=rev
Log:
Enable Convolutional Neural Network for event-time classification. May need further debug.

NeuralEventTimeRelationsEvaluation: will evaluate pre-trained embedding features.

NeuralEventTimeRelationsCNNEvaluation: will write sequece of tokens to feed in to the CNN.

Both above evals will evaluate the test instances within the cleartk framework. 

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeCNNAnnotator.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/NeuralEventTimeSelfRelationAnnotator.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/RelationSyntacticETEmbeddingFeatureExtractor.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/RelationSyntacticEmbeddingFeatureExtractor.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/NeuralEventTimeRelationsCNNEvaluation.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/NeuralEventTimeRelationsEvaluation.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/keras/
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/keras/KerasStringFeatureDataWriter.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/keras/KerasStringOutcomeClassifier.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/keras/KerasStringOutcomeClassifierBuilder.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/keras/KerasStringOutcomeDataWriter.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/keras/ScriptStringFeatureDataWriter.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/keras/ScriptStringOutcomeClassifier.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/keras/ScriptStringOutcomeClassifierBuilder.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/keras/ScriptStringOutcomeDataWriter.java

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeCNNAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeCNNAnnotator.java?rev=1753407&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeCNNAnnotator.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeCNNAnnotator.java
Tue Jul 19 17:14:35 2016
@@ -0,0 +1,240 @@
+package org.apache.ctakes.temporal.ae;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.temporal.ae.TemporalRelationExtractorAnnotator.IdentifiedAnnotationPair;
+import org.apache.ctakes.temporal.nn.eventTimeRelationPrinter;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.CleartkAnnotator;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.Instance;
+import org.cleartk.util.ViewUriUtil;
+
+import com.google.common.collect.Lists;
+
+public class EventTimeCNNAnnotator extends CleartkAnnotator<String> {
+
+	public static final String NO_RELATION_CATEGORY = "none";//-NONE-
+
+	public EventTimeCNNAnnotator() {
+		// TODO Auto-generated constructor stub
+	}
+
+	@Override
+	public void process(JCas jCas) throws AnalysisEngineProcessException {
+		//get all gold relation lookup
+		Map<List<Annotation>, BinaryTextRelation> relationLookup;
+		relationLookup = new HashMap<>();
+		if (this.isTraining()) {
+			relationLookup = new HashMap<>();
+			for (BinaryTextRelation relation : JCasUtil.select(jCas, BinaryTextRelation.class)) {
+				Annotation arg1 = relation.getArg1().getArgument();
+				Annotation arg2 = relation.getArg2().getArgument();
+				// The key is a list of args so we can do bi-directional lookup
+				List<Annotation> key = Arrays.asList(arg1, arg2);
+				if(relationLookup.containsKey(key)){
+					String reln = relationLookup.get(key).getCategory();
+					System.err.println("Error in: "+ ViewUriUtil.getURI(jCas).toString());
+					System.err.println("Error! This attempted relation " + relation.getCategory() + " already
has a relation " + reln + " at this span: " + arg1.getCoveredText() + " -- " + arg2.getCoveredText());
+				}else{
+					relationLookup.put(key, relation);
+				}
+			}
+		}
+
+		// go over sentences, extracting event-time relation instances
+		for(Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+			// collect all relevant relation arguments from the sentence
+			List<IdentifiedAnnotationPair> candidatePairs =
+					getCandidateRelationArgumentPairs(jCas, sentence);
+
+			// walk through the pairs of annotations
+			for (IdentifiedAnnotationPair pair : candidatePairs) {
+				IdentifiedAnnotation arg1 = pair.getArg1();
+				IdentifiedAnnotation arg2 = pair.getArg2();
+
+				String context;
+				if(arg2.getBegin() < arg1.getBegin()) {
+					// ... time ... event ... scenario
+					context = eventTimeRelationPrinter.getTokensBetween(jCas, sentence, arg2, "t", arg1,
"e", 2);  
+				} else {
+					// ... event ... time ... scenario
+					context = eventTimeRelationPrinter.getTokensBetween(jCas, sentence, arg1, "e", arg2,
"t", 2);
+				}
+
+				//derive features based on context:
+				List<Feature> feats = new ArrayList<>();
+				String[] tokens = context.split(" ");
+				for (String token: tokens){
+					feats.add(new Feature(token.toLowerCase()));
+				}
+
+				// during training, feed the features to the data writer
+				if (this.isTraining()) {
+					String category = getRelationCategory(relationLookup, arg1, arg2);
+					if (category == null) {
+						category = NO_RELATION_CATEGORY.toLowerCase();
+					}else{
+						category = category.toLowerCase();
+					}
+					this.dataWriter.write(new Instance<>(category, feats));
+				}
+
+				// during classification feed the features to the classifier and create annotations
+				else {
+					String predictedCategory = this.classifier.classify(feats);
+
+					// add a relation annotation if a true relation was predicted
+					if (predictedCategory != null && !predictedCategory.equals(NO_RELATION_CATEGORY.toLowerCase()))
{
+
+						// if we predict an inverted relation, reverse the order of the arguments
+						if (predictedCategory.endsWith("-1")) {
+							predictedCategory = predictedCategory.substring(0, predictedCategory.length() - 2);
+							if(arg1 instanceof TimeMention){
+								IdentifiedAnnotation temp = arg1;
+								arg1 = arg2;
+								arg2 = temp;
+							}
+						}else{
+							if(arg1 instanceof EventMention){
+								IdentifiedAnnotation temp = arg1;
+								arg1 = arg2;
+								arg2 = temp;
+							}
+						}
+
+						createRelation(jCas, arg1, arg2, predictedCategory.toUpperCase(), 0.0);
+					}
+				}
+			}
+
+		}
+	}
+
+	/**
+	 * original way of getting label
+	 * @param relationLookup
+	 * @param arg1
+	 * @param arg2
+	 * @return
+	 */
+	//	protected String getRelationCategory(
+	//			Map<List<Annotation>, BinaryTextRelation> relationLookup,
+	//			IdentifiedAnnotation arg1,
+	//			IdentifiedAnnotation arg2) {
+	//		BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
+	//		String category = null;
+	//		if (relation != null) {
+	//			category = relation.getCategory();
+	//		} else {
+	//			relation = relationLookup.get(Arrays.asList(arg2, arg1));
+	//			if (relation != null) {
+	//				if(relation.getCategory().equals("OVERLAP")){
+	//					category = relation.getCategory();
+	//				}else{
+	//					category = relation.getCategory() + "-1";
+	//				}
+	//			}
+	//		}
+	//
+	//		return category;
+	//	}
+
+	/** Dima's way of getting lables
+	 * @param relationLookup
+	 * @param arg1
+	 * @param arg2
+	 * @return
+	 */
+	protected String getRelationCategory(Map<List<Annotation>, BinaryTextRelation>
relationLookup,
+			IdentifiedAnnotation arg1,
+			IdentifiedAnnotation arg2){
+		BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
+		String category = null;
+		if (relation != null) {
+			category = relation.getCategory();
+			if(arg1 instanceof EventMention){
+				category = category + "-1";
+			}
+		} else {
+			relation = relationLookup.get(Arrays.asList(arg2, arg1));
+			if (relation != null) {
+				category = relation.getCategory();
+				if(arg2 instanceof EventMention){
+					category = category + "-1";
+				}
+			}
+		}
+
+		return category;
+
+	}
+
+
+	protected void createRelation(JCas jCas, IdentifiedAnnotation arg1,
+			IdentifiedAnnotation arg2, String predictedCategory, double confidence) {
+		RelationArgument relArg1 = new RelationArgument(jCas);
+		relArg1.setArgument(arg1);
+		relArg1.setRole("Arg1");
+		relArg1.addToIndexes();
+		RelationArgument relArg2 = new RelationArgument(jCas);
+		relArg2.setArgument(arg2);
+		relArg2.setRole("Arg2");
+		relArg2.addToIndexes();
+		TemporalTextRelation relation = new TemporalTextRelation(jCas);
+		relation.setArg1(relArg1);
+		relation.setArg2(relArg2);
+		relation.setCategory(predictedCategory);
+		relation.setConfidence(confidence);
+		relation.addToIndexes();
+	}
+
+	private List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(JCas jCas,
Sentence sentence) {
+		//		Map<EventMention, Collection<EventMention>> coveringMap =
+		//				JCasUtil.indexCovering(jCas, EventMention.class, EventMention.class);
+
+		List<IdentifiedAnnotationPair> pairs = Lists.newArrayList();
+		for (EventMention event : JCasUtil.selectCovered(jCas, EventMention.class, sentence)) {
+			boolean eventValid = false;
+			if (event.getClass().equals(EventMention.class)) {//event is a gold event
+				eventValid = true;
+			}
+
+			if(eventValid){
+				// ignore subclasses like Procedure and Disease/Disorder
+				if(this.isTraining()){//if training mode, train on both gold event and span-overlapping
system events
+					for (TimeMention time : JCasUtil.selectCovered(jCas, TimeMention.class, sentence)) {
+
+						//						Collection<EventMention> eventList = coveringMap.get(event);
+						//						for(EventMention covEvent : eventList){
+						//							pairs.add(new IdentifiedAnnotationPair(covEvent, time));
+						//						}
+						pairs.add(new IdentifiedAnnotationPair(event, time));
+					}
+				}else{//if testing mode, only test on system generated events
+					for (TimeMention time : JCasUtil.selectCovered(jCas, TimeMention.class, sentence)) {
+						pairs.add(new IdentifiedAnnotationPair(event, time));
+					}
+				}
+			}
+		}
+
+		return pairs;
+	}
+
+}

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/NeuralEventTimeSelfRelationAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/NeuralEventTimeSelfRelationAnnotator.java?rev=1753407&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/NeuralEventTimeSelfRelationAnnotator.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/NeuralEventTimeSelfRelationAnnotator.java
Tue Jul 19 17:14:35 2016
@@ -0,0 +1,283 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.features.PartOfSpeechFeaturesExtractor;
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.relationextractor.ae.features.TokenFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.CheckSpecialWordRelationExtractor;
+import org.apache.ctakes.temporal.ae.feature.ConjunctionRelationFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.ContinuousTextExtractor;
+import org.apache.ctakes.temporal.ae.feature.DependencyFeatureExtractor;
+import org.apache.ctakes.temporal.ae.feature.DependencyPathFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.RelationEmbeddingFeatureExtractor;
+import org.apache.ctakes.temporal.ae.feature.RelationSyntacticETEmbeddingFeatureExtractor;
+import org.apache.ctakes.temporal.ae.feature.EmptyFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.EventArgumentPropertyExtractor;
+import org.apache.ctakes.temporal.ae.feature.MultiTokenFeaturesExtractor;
+//import org.apache.ctakes.temporal.ae.feature.EventIndexOfSameSentenceRelationFeaturesExtractor;
+//import org.apache.ctakes.temporal.ae.feature.EventPositionRelationFeaturesExtractor;
+//import org.apache.ctakes.temporal.ae.feature.EventTimeRelationFeatureExtractor;
+import org.apache.ctakes.temporal.ae.feature.NearbyVerbTenseRelationExtractor;
+import org.apache.ctakes.temporal.ae.feature.NearestFlagFeatureExtractor;
+import org.apache.ctakes.temporal.ae.feature.NumberOfEventTimeBetweenCandidatesExtractor;
+import org.apache.ctakes.temporal.ae.feature.OverlappedHeadFeaturesExtractor;
+//import org.apache.ctakes.temporal.ae.feature.SRLRelationFeaturesExtractor;
+//import org.apache.ctakes.temporal.ae.feature.SectionHeaderRelationExtractor;
+import org.apache.ctakes.temporal.ae.feature.TemporalAttributeFeatureExtractor;
+import org.apache.ctakes.temporal.ae.feature.TemporalPETFlatExtractor;
+//import org.apache.ctakes.temporal.ae.feature.TimeWordTypeRelationExtractor;
+import org.apache.ctakes.temporal.ae.feature.TimeXPropertyRelationFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.TimeXRelationFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.UnexpandedTokenFeaturesExtractor;
+//import org.apache.ctakes.temporal.ae.feature.UnexpandedTokenFeaturesExtractor;
+//import org.apache.ctakes.temporal.ae.feature.TemporalAttributeFeatureExtractor;
+//import org.apache.ctakes.temporal.ae.feature.treekernel.EventTimeFlatTreeFeatureExtractor;
+//import org.apache.ctakes.temporal.ae.feature.treekernel.EventVerbRelationTreeExtractor;
+//import org.apache.ctakes.temporal.ae.feature.treekernel.TemporalPETExtractor;
+//import org.apache.ctakes.temporal.ae.feature.treekernel.TemporalPathExtractor;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.ml.CleartkAnnotator;
+import org.cleartk.ml.DataWriter;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+//import org.cleartk.ml.feature.transform.InstanceDataWriter; //used for normalization
+import org.cleartk.ml.jar.DefaultDataWriterFactory;
+import org.cleartk.ml.jar.DirectoryDataWriterFactory;
+import org.cleartk.ml.jar.GenericJarClassifierFactory;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+
+public class NeuralEventTimeSelfRelationAnnotator extends TemporalRelationExtractorAnnotator
{
+
+	public static AnalysisEngineDescription createDataWriterDescription(
+			Class<? extends DataWriter<String>> dataWriterClass,
+					File outputDirectory,
+					double probabilityOfKeepingANegativeExample) throws ResourceInitializationException
{
+		return AnalysisEngineFactory.createEngineDescription(
+				NeuralEventTimeSelfRelationAnnotator.class,
+				CleartkAnnotator.PARAM_IS_TRAINING,
+				true,
+				DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+				dataWriterClass,
+				DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+				outputDirectory,
+				RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+				// not sure why this has to be cast; something funny going on in uimaFIT maybe?
+				(float) probabilityOfKeepingANegativeExample);
+	}
+
+	public static AnalysisEngineDescription createEngineDescription(String modelPath)
+			throws ResourceInitializationException {
+		return AnalysisEngineFactory.createEngineDescription(
+				NeuralEventTimeSelfRelationAnnotator.class,
+				CleartkAnnotator.PARAM_IS_TRAINING,
+				false,
+				GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+				modelPath);
+	}
+	/**
+	 * @deprecated use String path instead of File.
+	 * ClearTK will automatically Resolve the String to an InputStream.
+	 * This will allow resources to be read within from a jar as well as File.  
+	 */	 
+	@Deprecated
+	public static AnalysisEngineDescription createEngineDescription(File modelDirectory)
+			throws ResourceInitializationException {
+		return AnalysisEngineFactory.createEngineDescription(
+				NeuralEventTimeSelfRelationAnnotator.class,
+				CleartkAnnotator.PARAM_IS_TRAINING,
+				false,
+				GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+				new File(modelDirectory, "model.jar"));
+	}
+
+	private RelationSyntacticETEmbeddingFeatureExtractor syntaticEmbeddingExtractor;
+	private RelationEmbeddingFeatureExtractor wordEmbeddingExtractor;
+
+	@Override
+	protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>>
getFeatureExtractors() {
+		final String syntacticVectorFile = "org/apache/ctakes/temporal/glove_200d.txt";
+		final String wordVectorFile = "org/apache/ctakes/temporal/mimic_vectors.txt";
+		try {
+			this.syntaticEmbeddingExtractor = new RelationSyntacticETEmbeddingFeatureExtractor(syntacticVectorFile);
+		} catch (CleartkExtractorException e) {
+			System.err.println("cannot find file: "+ syntacticVectorFile);
+			e.printStackTrace();
+		}
+		try {
+			this.wordEmbeddingExtractor = new RelationEmbeddingFeatureExtractor(wordVectorFile);
+		} catch (CleartkExtractorException e) {
+			System.err.println("cannot find file: "+ wordVectorFile);
+			e.printStackTrace();
+		}
+		return Lists.newArrayList(
+				//				new UnexpandedTokenFeaturesExtractor()//new TokenFeaturesExtractor()	
+				syntaticEmbeddingExtractor
+				, wordEmbeddingExtractor
+				//				, new NearestFlagFeatureExtractor()
+				//				, new DependencyPathFeaturesExtractor()
+				//				, new EventArgumentPropertyExtractor()
+				//				, new ConjunctionRelationFeaturesExtractor()
+				//				, new CheckSpecialWordRelationExtractor()
+				//				, new TemporalAttributeFeatureExtractor()
+
+				);
+	}
+
+	@Override
+	protected Class<? extends Annotation> getCoveringClass() {
+		return Sentence.class;
+	}
+
+	@Override
+	public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+			JCas jCas,
+			Annotation sentence) {
+		Map<EventMention, Collection<EventMention>> coveringMap =
+				JCasUtil.indexCovering(jCas, EventMention.class, EventMention.class);
+
+		List<IdentifiedAnnotationPair> pairs = Lists.newArrayList();
+		for (EventMention event : JCasUtil.selectCovered(jCas, EventMention.class, sentence)) {
+			boolean eventValid = false;
+			if (event.getClass().equals(EventMention.class)) {//event is a gold event
+				//				for( EventMention aEve : JCasUtil.selectCovered(jCas, EventMention.class, event)){
+				//					if(!aEve.getClass().equals(EventMention.class)){//this event cover a UMLS semantic
type
+				eventValid = true;
+				//						break;
+				//					}
+				//				}
+			}
+
+			if(eventValid){
+				// ignore subclasses like Procedure and Disease/Disorder
+				if(this.isTraining()){//if training mode, train on both gold event and span-overlapping
system events
+					for (TimeMention time : JCasUtil.selectCovered(jCas, TimeMention.class, sentence)) {
+
+						Collection<EventMention> eventList = coveringMap.get(event);
+						for(EventMention covEvent : eventList){
+							pairs.add(new IdentifiedAnnotationPair(covEvent, time));
+						}
+						pairs.add(new IdentifiedAnnotationPair(event, time));
+					}
+				}else{//if testing mode, only test on system generated events
+					for (TimeMention time : JCasUtil.selectCovered(jCas, TimeMention.class, sentence)) {
+						pairs.add(new IdentifiedAnnotationPair(event, time));
+					}
+				}
+			}
+		}
+
+		return pairs;
+	}
+
+	@Override
+	protected void createRelation(JCas jCas, IdentifiedAnnotation arg1,
+			IdentifiedAnnotation arg2, String predictedCategory, double confidence) {
+		RelationArgument relArg1 = new RelationArgument(jCas);
+		relArg1.setArgument(arg1);
+		relArg1.setRole("Arg1");
+		relArg1.addToIndexes();
+		RelationArgument relArg2 = new RelationArgument(jCas);
+		relArg2.setArgument(arg2);
+		relArg2.setRole("Arg2");
+		relArg2.addToIndexes();
+		TemporalTextRelation relation = new TemporalTextRelation(jCas);
+		relation.setArg1(relArg1);
+		relation.setArg2(relArg2);
+		relation.setCategory(predictedCategory);
+		relation.setConfidence(confidence);
+		relation.addToIndexes();
+	}
+
+
+	@Override
+	protected String getRelationCategory(
+			Map<List<Annotation>, BinaryTextRelation> relationLookup,
+			IdentifiedAnnotation arg1,
+			IdentifiedAnnotation arg2) {
+		BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
+		String category = null;
+		if (relation != null) {
+			category = relation.getCategory();
+		} else {
+			relation = relationLookup.get(Arrays.asList(arg2, arg1));
+			if (relation != null) {
+				if(relation.getCategory().equals("OVERLAP")){
+					category = relation.getCategory();
+					//				}else if (relation.getCategory().equals("BEFORE")){
+					//					category = "AFTER";
+					//				}else if (relation.getCategory().equals("AFTER")){
+					//					category = "BEFORE";
+					//				}
+				}else{
+					category = relation.getCategory() + "-1";
+				}
+			}
+		}
+
+		//		if(category!=null){
+		//			if(!((EventMention)arg1).getClass().equals(EventMention.class)){
+		//				System.out.println("find system-event relations: "+ arg1.getCoveredText() + " -"+category+"-
" + arg2.getCoveredText());
+		//			}else{
+		//				System.out.println("find gold-event relations: "+ arg1.getCoveredText() + " -"+category+"-
" + arg2.getCoveredText());
+		//			}
+		//		}
+
+		if (category == null && coin.nextDouble() <= this.probabilityOfKeepingANegativeExample)
{
+			category = NO_RELATION_CATEGORY;
+		}
+
+		return category;
+	}
+
+	/**used for normalization
+	public static AnalysisEngineDescription createDataWriterDescription(Class<InstanceDataWriter>
dataWriterClass,
+			File outputDirectory, float probabilityOfKeepingANegativeExample) throws ResourceInitializationException
{
+		return AnalysisEngineFactory.createEngineDescription(
+				EventTimeSelfRelationAnnotator.class,
+				CleartkAnnotator.PARAM_IS_TRAINING,
+				true,
+				DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+				dataWriterClass,
+				DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+				outputDirectory,
+				RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+				probabilityOfKeepingANegativeExample);
+	}*/
+}

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/RelationSyntacticETEmbeddingFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/RelationSyntacticETEmbeddingFeatureExtractor.java?rev=1753407&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/RelationSyntacticETEmbeddingFeatureExtractor.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/RelationSyntacticETEmbeddingFeatureExtractor.java
Tue Jul 19 17:14:35 2016
@@ -0,0 +1,204 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.ctakes.constituency.parser.treekernel.TreeExtractor;
+import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.utils.distsem.WordEmbeddings;
+import org.apache.ctakes.utils.distsem.WordVector;
+import org.apache.ctakes.utils.distsem.WordVectorReader;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+
+/**
+ * Word embedding based features.
+ */
+public class RelationSyntacticETEmbeddingFeatureExtractor implements RelationFeaturesExtractor<IdentifiedAnnotation,
IdentifiedAnnotation> {
+
+	private int numberOfDimensions;
+	private WordEmbeddings paths = null;
+
+	public RelationSyntacticETEmbeddingFeatureExtractor(String vecFile) throws
+	CleartkExtractorException {
+		try {
+			paths =
+					WordVectorReader.getEmbeddings(FileLocator.getAsStream(vecFile));
+		} catch (IOException e) {
+			e.printStackTrace();
+			throw new CleartkExtractorException(e);
+		}
+		numberOfDimensions = paths.getDimensionality();
+	}
+
+	@Override
+	public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1, IdentifiedAnnotation
arg2) throws AnalysisEngineProcessException {
+
+		List<Feature> features = new ArrayList<>();
+
+		//get the PET tree between arg1 and arg2:
+		// first get the root and print it out...
+		TopTreebankNode root = AnnotationTreeUtils.getTreeCopy(jCas, AnnotationTreeUtils.getAnnotationTree(jCas,
arg1));
+
+		if(root == null){
+			return features; 
+		}
+
+		TreebankNode t1 = AnnotationTreeUtils.annotationNode(jCas, arg1);
+		TreebankNode t2 = AnnotationTreeUtils.annotationNode(jCas, arg2);
+
+		//		addOtherTimes(jcas,root, arg1, arg2);
+
+		// words between argument features
+		List<String> pathsBetweenArgs = new ArrayList<>();
+
+		//		SimpleTree tree = null;
+		if(t1.getBegin() <= t2.getBegin() && t1.getEnd() >= t2.getEnd()){
+			// t1 encloses t2
+			//			tree = TreeExtractor.getSimpleClone(t1);
+			pathsBetweenArgs.add(getPathBetweenNodes(t2, t1,""));
+		}else if(t2.getBegin() <= t1.getBegin() && t2.getEnd() >= t1.getEnd()){
+			// t2 encloses t1
+			//			tree = TreeExtractor.getSimpleClone(t2);
+			pathsBetweenArgs.add(getPathBetweenNodes(t1, t2,""));
+		}else{
+			//			tree = TreeExtractor.extractPathEnclosedTree(t1, t2, jCas);
+			TreebankNode lca = TreeExtractor.getLCA(t1, t2);
+			pathsBetweenArgs.add(getPathBetweenNodes(t1, lca,""));
+			pathsBetweenArgs.add(getPathBetweenNodes(t2, lca,""));
+		}
+
+		List<Double> sum = new ArrayList<>(Collections.nCopies(numberOfDimensions,
0.0));
+		for(String path : pathsBetweenArgs) {
+			WordVector wordVector;
+			if(paths.containsKey(path)) {
+				wordVector = paths.getVector(path);
+			} else {
+				while(!paths.containsKey(path)){
+					String trimmedPath = removeTail(path);
+					if(trimmedPath==null){
+						break;
+					}
+					path = trimmedPath;
+				}
+				if(paths.containsKey(path)){
+					wordVector = paths.getVector(path);
+				}else{
+					wordVector = paths.getVector("<unk>");
+				}
+			}
+			sum = addVectors(sum, wordVector);      
+		}
+
+		for(int dim = 0; dim < numberOfDimensions; dim++) {
+			String featureName = String.format("syntactic_average_dim_%d", dim);
+			features.add(new Feature(featureName, sum.get(dim) / pathsBetweenArgs.size()));
+		}
+
+		return features;
+	}
+
+	private static String removeTail(String path) {
+		int dashIdx = path.lastIndexOf("-");
+		if(dashIdx>0){
+			path = path.substring(0, dashIdx);
+			return path;
+		}
+		return null;
+	}
+
+	private String getPathBetweenNodes(TreebankNode child, TreebankNode ancestor, String path)
{
+		TreebankNode father = child.getParent();
+		if("".equals(path)){
+			path = child.getNodeType();
+		}else{
+			path = child.getNodeType()+"-"+path;
+		}
+		if(father == null){
+			return path;
+		}else if(father == ancestor){
+			path = father.getNodeType() + "-" + path;
+			return path;
+		}
+		return getPathBetweenNodes(father, ancestor, path);
+	}
+
+
+	/**
+	 * Compute cosine similarity between two vectors.
+	 */
+	public double computeCosineSimilarity(WordVector vector1, WordVector vector2) {
+
+		double dotProduct = 0.0;
+		double norm1 = 0.01;
+		double norm2 = 0.01;
+
+		for (int dim = 0; dim < numberOfDimensions; dim++) {
+			dotProduct = dotProduct + vector1.getValue(dim) * vector2.getValue(dim);
+			norm1 = norm1 + Math.pow(vector1.getValue(dim), 2);
+			norm2 = norm2 + Math.pow(vector2.getValue(dim), 2);
+		}
+
+		return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
+	}
+
+	public double computeCosineSimilarity(List<Double> vector1, List<Double> vector2)
{
+
+		double dotProduct = 0.0;
+		double norm1 = 0.01;
+		double norm2 = 0.01;
+
+		for (int dim = 0; dim < numberOfDimensions; dim++) {
+			dotProduct = dotProduct + vector1.get(dim) * vector2.get(dim);
+			norm1 = norm1 + Math.pow(vector1.get(dim), 2);
+			norm2 = norm2 + Math.pow(vector2.get(dim), 2);
+		}
+
+		return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
+	}
+
+	/**
+	 * Add two vectors. Return the sum vector.
+	 */
+	public List<Double> addVectors(List<Double> vector1, WordVector vector2) {
+
+		List<Double> sum = new ArrayList<>();
+		for(int dim = 0; dim < numberOfDimensions; dim++) {
+			sum.add(vector1.get(dim) + vector2.getValue(dim));
+		}
+
+		return sum;
+	}
+}

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/RelationSyntacticEmbeddingFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/RelationSyntacticEmbeddingFeatureExtractor.java?rev=1753407&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/RelationSyntacticEmbeddingFeatureExtractor.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/RelationSyntacticEmbeddingFeatureExtractor.java
Tue Jul 19 17:14:35 2016
@@ -0,0 +1,262 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.ctakes.constituency.parser.treekernel.TreeExtractor;
+import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.utils.distsem.WordEmbeddings;
+import org.apache.ctakes.utils.distsem.WordVector;
+import org.apache.ctakes.utils.distsem.WordVectorReader;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+
+/**
+ * Word embedding based features.
+ */
+public class RelationSyntacticEmbeddingFeatureExtractor implements RelationFeaturesExtractor<IdentifiedAnnotation,
IdentifiedAnnotation> {
+
+	private int numberOfDimensions;
+	private WordEmbeddings paths = null;
+
+	public RelationSyntacticEmbeddingFeatureExtractor(String vecFile) throws
+	CleartkExtractorException {
+		try {
+			paths =
+					WordVectorReader.getEmbeddings(FileLocator.getAsStream(vecFile));
+		} catch (IOException e) {
+			e.printStackTrace();
+			throw new CleartkExtractorException(e);
+		}
+		numberOfDimensions = paths.getDimensionality();
+	}
+
+	@Override
+	public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1, IdentifiedAnnotation
arg2) throws AnalysisEngineProcessException {
+
+		List<Feature> features = new ArrayList<>();
+
+		//get the PET tree between arg1 and arg2:
+		// first get the root and print it out...
+		TopTreebankNode root = AnnotationTreeUtils.getTreeCopy(jCas, AnnotationTreeUtils.getAnnotationTree(jCas,
arg1));
+
+		if(root == null){
+			return features; 
+		}
+
+		TreebankNode t1 = AnnotationTreeUtils.annotationNode(jCas, arg1);
+		TreebankNode t2 = AnnotationTreeUtils.annotationNode(jCas, arg2);
+
+		//		addOtherTimes(jcas,root, arg1, arg2);
+
+		// words between argument features
+		List<String> pathsBetweenArgs = new ArrayList<>();
+
+		
+		//		SimpleTree tree = null;
+		if(t1.getBegin() <= t2.getBegin() && t1.getEnd() >= t2.getEnd()){
+			// t1 encloses t2
+			//			tree = TreeExtractor.getSimpleClone(t1);
+			pathsBetweenArgs.add(getPathBetweenNodes(t2, t1,""));
+		}else if(t2.getBegin() <= t1.getBegin() && t2.getEnd() >= t1.getEnd()){
+			// t2 encloses t1
+			//			tree = TreeExtractor.getSimpleClone(t2);
+			pathsBetweenArgs.add(getPathBetweenNodes(t1, t2,""));
+		}else{
+			//			tree = TreeExtractor.extractPathEnclosedTree(t1, t2, jCas);
+			TreebankNode lca = TreeExtractor.getLCA(t1, t2);
+			pathsBetweenArgs.add(getPathBetweenNodes(t1, lca,""));
+			pathsBetweenArgs.add(getPathBetweenNodes(t2, lca,""));
+		}
+
+		//		tree.setGeneralizeLeaf(true);
+
+		SimpleTree tree = TreeExtractor.getSimpleClone(t1);
+		pathsBetweenArgs.addAll(traverseTreeForDPath(tree));
+		tree = TreeExtractor.getSimpleClone(t2);
+		pathsBetweenArgs.addAll(traverseTreeForDPath(tree));
+
+		//		pathsBetweenArgs.addAll(traverseTreeForDPath(tree));
+		//
+		//		if(pathsBetweenArgs.size() < 1) {
+		//			return features;  
+		//		}
+
+		List<Double> sum = new ArrayList<>(Collections.nCopies(numberOfDimensions,
0.0));
+		for(String path : pathsBetweenArgs) {
+			WordVector wordVector;
+			if(paths.containsKey(path)) {
+				wordVector = paths.getVector(path);
+			} else {
+				while(!paths.containsKey(path)){
+					String trimmedPath = removeTail(path);
+					if(trimmedPath==null){
+						break;
+					}
+					path = trimmedPath;
+				}
+				if(paths.containsKey(path)){
+					wordVector = paths.getVector(path);
+				}else{
+					wordVector = paths.getVector("S");
+				}
+			}
+			sum = addVectors(sum, wordVector);      
+		}
+
+		for(int dim = 0; dim < numberOfDimensions; dim++) {
+			String featureName = String.format("syntactic_average_dim_%d", dim);
+			features.add(new Feature(featureName, sum.get(dim) / pathsBetweenArgs.size()));
+		}
+
+		return features;
+	}
+
+	private static String removeTail(String path) {
+		int dashIdx = path.lastIndexOf("-");
+		if(dashIdx>0){
+			path = path.substring(0, dashIdx);
+			return path;
+		}
+		return null;
+	}
+
+	private String getPathBetweenNodes(TreebankNode child, TreebankNode ancestor, String path)
{
+		TreebankNode father = child.getParent();
+		if("".equals(path)){
+			path = child.getNodeType();
+		}else{
+			path = child.getNodeType()+"-"+path;
+		}
+		if(father == null){
+			return path;
+		}else if(father == ancestor){
+			path = father.getNodeType() + "-" + path;
+			return path;
+		}
+		return getPathBetweenNodes(father, ancestor, path);
+	}
+
+	private String getPathToRoot(TreebankNode child, String path) {
+		TreebankNode father = child.getParent();
+		if("".equals(path)){
+			path = child.getNodeType();
+		}else{
+			path = child.getNodeType()+"-"+path;
+		}
+		if(father == null){
+			return path;
+		}
+		return getPathToRoot(father, path);
+	}
+	
+	private List<String> traverseTreeForDPath(SimpleTree tree) {
+		List<String> features = new ArrayList<>();
+		String rootStr = tree.cat;
+		features.add(rootStr);//add length 0 DPK
+		if(tree.children.size() == 1 && tree.children.get(0).children.size() == 0){//if
tree is a leaf
+			features.add(rootStr + "-" + tree.children.get(0).cat);
+		}else{//if tree is not a leaf
+			for(SimpleTree subtree: tree.children){
+				features.addAll(traverseTreeForDPath(subtree));
+				for(String str: getSubTreeStrings(subtree)){
+					features.add(rootStr+"-"+str);
+				}
+			}
+		}
+		return features;
+	}
+
+	private List<String> getSubTreeStrings(SimpleTree subtree) {
+		List<String> subTreeStrings = new ArrayList<>();
+		subTreeStrings.add(subtree.cat);
+		if(subtree.children.size() == 1 && subtree.children.get(0).children.size() == 0){//if
subtree is a leaf
+			subTreeStrings.add(subtree.cat + "-" + subtree.children.get(0).cat);
+		}else{ //if subtree is not a leaf
+			for(SimpleTree subsubTree: subtree.children){
+				for(String str: getSubTreeStrings(subsubTree)){
+					subTreeStrings.add(subtree.cat+"-"+str);
+				}
+			}
+		}
+		return subTreeStrings;
+	}
+
+	/**
+	 * Compute cosine similarity between two vectors.
+	 */
+	public double computeCosineSimilarity(WordVector vector1, WordVector vector2) {
+
+		double dotProduct = 0.0;
+		double norm1 = 0.01;
+		double norm2 = 0.01;
+
+		for (int dim = 0; dim < numberOfDimensions; dim++) {
+			dotProduct = dotProduct + vector1.getValue(dim) * vector2.getValue(dim);
+			norm1 = norm1 + Math.pow(vector1.getValue(dim), 2);
+			norm2 = norm2 + Math.pow(vector2.getValue(dim), 2);
+		}
+
+		return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
+	}
+
+	public double computeCosineSimilarity(List<Double> vector1, List<Double> vector2)
{
+
+		double dotProduct = 0.0;
+		double norm1 = 0.01;
+		double norm2 = 0.01;
+
+		for (int dim = 0; dim < numberOfDimensions; dim++) {
+			dotProduct = dotProduct + vector1.get(dim) * vector2.get(dim);
+			norm1 = norm1 + Math.pow(vector1.get(dim), 2);
+			norm2 = norm2 + Math.pow(vector2.get(dim), 2);
+		}
+
+		return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
+	}
+
+	/**
+	 * Add two vectors. Return the sum vector.
+	 */
+	public List<Double> addVectors(List<Double> vector1, WordVector vector2) {
+
+		List<Double> sum = new ArrayList<>();
+		for(int dim = 0; dim < numberOfDimensions; dim++) {
+			sum.add(vector1.get(dim) + vector2.getValue(dim));
+		}
+
+		return sum;
+	}
+}




Mime
View raw message