ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From c...@apache.org
Subject svn commit: r1466170 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/ ae/feature/ eval/
Date Tue, 09 Apr 2013 18:23:28 GMT
Author: clin
Date: Tue Apr  9 18:23:27 2013
New Revision: 1466170

URL: http://svn.apache.org/r1466170
Log:
add several feature extractors to help docTimeRel classification:
For an event, 
1. get the closest verb, and its pos
2. get the closest time expression
3. get the segment id.
4. get nearby verbs' pos pattern

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ClosestVerbExtractor.java
  (with props)
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SectionHeaderExtractor.java
  (with props)
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXExtractor.java
  (with props)
Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NearbyVerbTenseXExtractor.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java?rev=1466170&r1=1466169&r2=1466170&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
Tue Apr  9 18:23:27 2013
@@ -21,7 +21,10 @@ package org.apache.ctakes.temporal.ae;
 import java.io.File;
 import java.util.List;
 
+import org.apache.ctakes.temporal.ae.feature.ClosestVerbExtractor;
 import org.apache.ctakes.temporal.ae.feature.NearbyVerbTenseXExtractor;
+import org.apache.ctakes.temporal.ae.feature.SectionHeaderExtractor;
+import org.apache.ctakes.temporal.ae.feature.TimeXExtractor;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.uima.UimaContext;
@@ -73,6 +76,9 @@ public class DocTimeRelAnnotator extends
 
   private CleartkExtractor contextExtractor;
   private NearbyVerbTenseXExtractor verbTensePatternExtractor;
+  private SectionHeaderExtractor sectionIDExtractor;
+  private ClosestVerbExtractor closestVerbExtractor;
+  private TimeXExtractor timeXExtractor;
 
   @Override
   public void initialize(UimaContext context) throws ResourceInitializationException {
@@ -87,6 +93,9 @@ public class DocTimeRelAnnotator extends
         new Covered(),
         new Following(3));
     this.verbTensePatternExtractor = new NearbyVerbTenseXExtractor();
+    this.sectionIDExtractor = new SectionHeaderExtractor();
+    this.closestVerbExtractor = new ClosestVerbExtractor();
+    this.timeXExtractor = new TimeXExtractor();
   }
 
   @Override
@@ -95,6 +104,9 @@ public class DocTimeRelAnnotator extends
       if (eventMention.getEvent() != null) {
         List<Feature> features = this.contextExtractor.extract(jCas, eventMention);
         features.addAll(this.verbTensePatternExtractor.extract(jCas, eventMention));//add
nearby verb POS pattern feature
+        features.addAll(this.sectionIDExtractor.extract(jCas, eventMention)); //add section
heading
+        features.addAll(this.closestVerbExtractor.extract(jCas, eventMention)); //add closest
verb
+        features.addAll(this.timeXExtractor.extract(jCas, eventMention)); //add the closest
time expression types
         if (this.isTraining()) {
           String outcome = eventMention.getEvent().getProperties().getDocTimeRel();
           this.dataWriter.write(new Instance<String>(outcome, features));

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ClosestVerbExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ClosestVerbExtractor.java?rev=1466170&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ClosestVerbExtractor.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ClosestVerbExtractor.java
Tue Apr  9 18:23:27 2013
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+//import java.util.logging.Logger;
+
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.uimafit.util.JCasUtil;
+
+public class ClosestVerbExtractor implements SimpleFeatureExtractor {
+
+  private String name;
+
+//  private Logger logger = Logger.getLogger(this.getClass().getName());
+
+  public ClosestVerbExtractor() {
+    super();
+    this.name = "ClosestVerb";
+    
+  }
+
+  @Override
+  public List<Feature> extract(JCas view, Annotation annotation) throws CleartkExtractorException
{
+	  List<Feature> features = new ArrayList<Feature>();
+	  
+	  //1 get covering sentence:
+	  Map<EventMention, Collection<Sentence>> coveringMap =
+			  JCasUtil.indexCovering(view, EventMention.class, Sentence.class);
+	  EventMention targetTokenAnnotation = (EventMention)annotation;
+	  Collection<Sentence> sentList = coveringMap.get(targetTokenAnnotation);
+	  
+	  Map<Integer, WordToken> verbDistMap = null;
+	  
+	  //2 get all Verbs within the same sentence as target event lies
+	  if (sentList != null && !sentList.isEmpty()){
+		  for(Sentence sent : sentList) {
+			  verbDistMap = new TreeMap<Integer, WordToken>();
+			  for ( WordToken wt : JCasUtil.selectCovered(view, WordToken.class, sent)) {
+				  if (wt != null){
+					  String pos = wt.getPartOfSpeech();
+					  if (pos.startsWith("VB")){
+						  verbDistMap.put(Math.abs(wt.getBegin() - annotation.getBegin()), wt);
+					  }
+				  }
+			  }
+			  for (Map.Entry<Integer, WordToken> entry : verbDistMap.entrySet()) {
+				  //Feature feature = new Feature(this.name, entry.getValue().getCoveredText());
+				  //		        	  features.add(feature);
+				  //logger.info("found nearby closest verb: "+ entry.getValue().getCoveredText() + "
POS:" + entry.getValue().getPartOfSpeech());
+				  Feature posfeature = new Feature(this.name, entry.getValue().getPartOfSpeech());
+				  features.add(posfeature);
+				  break;		          
+			  }
+		  }
+		  
+	  }
+	  return features;
+  }
+
+}

Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ClosestVerbExtractor.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NearbyVerbTenseXExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NearbyVerbTenseXExtractor.java?rev=1466170&r1=1466169&r2=1466170&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NearbyVerbTenseXExtractor.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/NearbyVerbTenseXExtractor.java
Tue Apr  9 18:23:27 2013
@@ -22,7 +22,7 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
-import java.util.logging.Logger;
+//import java.util.logging.Logger;
 
 import org.apache.ctakes.typesystem.type.syntax.WordToken;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
@@ -38,7 +38,7 @@ public class NearbyVerbTenseXExtractor i
 
   private String name;
 
-  private Logger logger = Logger.getLogger(this.getClass().getName());
+  //private Logger logger = Logger.getLogger(this.getClass().getName());
 
   public NearbyVerbTenseXExtractor() {
     super();
@@ -70,7 +70,7 @@ public class NearbyVerbTenseXExtractor i
 			  }
 			  Feature feature = new Feature(this.name, verbTP);
 			  features.add(feature);
-			  logger.info("found nearby verb's pos tag: "+ verbTP);
+			  //logger.info("found nearby verb's pos tag: "+ verbTP);
 		  }
 		  
 	  }

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SectionHeaderExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SectionHeaderExtractor.java?rev=1466170&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SectionHeaderExtractor.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SectionHeaderExtractor.java
Tue Apr  9 18:23:27 2013
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+//import java.util.logging.Logger;
+
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.uimafit.util.JCasUtil;
+
+public class SectionHeaderExtractor implements SimpleFeatureExtractor {
+
+  private String name;
+
+//  private Logger logger = Logger.getLogger(this.getClass().getName());
+
+  public SectionHeaderExtractor() {
+    super();
+    this.name = "SectionHeader";
+    
+  }
+
+  @Override
+  public List<Feature> extract(JCas view, Annotation annotation) throws CleartkExtractorException
{
+	  List<Feature> features = new ArrayList<Feature>();
+	  
+	  //1 get covering sentence:
+	  Map<EventMention, Collection<Segment>> coveringMap =
+			  JCasUtil.indexCovering(view, EventMention.class, Segment.class);
+	  EventMention targetTokenAnnotation = (EventMention)annotation;
+	  Collection<Segment> segList = coveringMap.get(targetTokenAnnotation);
+	  
+	  //2 get Verb Tense
+	  if (segList != null && !segList.isEmpty()){
+		  for(Segment seg : segList) {
+			  String segname = seg.getId();
+			  Feature feature = new Feature(this.name, segname);
+			  features.add(feature);
+//			  logger.info("found segment id: "+ segname);
+		  }
+		  
+	  }
+	  return features;
+  }
+
+}

Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SectionHeaderExtractor.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXExtractor.java?rev=1466170&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXExtractor.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXExtractor.java
Tue Apr  9 18:23:27 2013
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+//import java.util.logging.Logger;
+
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.uimafit.util.JCasUtil;
+
+public class TimeXExtractor implements SimpleFeatureExtractor {
+
+  private String name;
+
+//  private Logger logger = Logger.getLogger(this.getClass().getName());
+
+  public TimeXExtractor() {
+    super();
+    this.name = "TimeXFeature";
+    
+  }
+
+  @Override
+  public List<Feature> extract(JCas view, Annotation annotation) throws CleartkExtractorException
{
+	  List<Feature> features = new ArrayList<Feature>();
+	  
+	  //1 get covering sentence:
+	  Map<EventMention, Collection<Sentence>> coveringMap =
+			  JCasUtil.indexCovering(view, EventMention.class, Sentence.class);
+	  EventMention targetTokenAnnotation = (EventMention)annotation;
+	  Collection<Sentence> sentList = coveringMap.get(targetTokenAnnotation);
+	  
+	  //2 get TimeX
+	  Map<Integer, TimeMention> timeDistMap = null;
+	  
+	  if (sentList != null && !sentList.isEmpty()){
+		  timeDistMap = new TreeMap<Integer, TimeMention>();
+		  
+		  for(Sentence sent : sentList) {
+			  for (TimeMention time : JCasUtil.selectCovered(view, TimeMention.class, sent)) {
+				  timeDistMap.put(Math.abs(time.getBegin() - annotation.getBegin()), time);
+			  }
+		  }
+		  
+		  //get the closest Time Expression feature
+		  for (Map.Entry<Integer, TimeMention> entry : timeDistMap.entrySet()) {
+			  Feature feature = new Feature(this.name, entry.getValue().getCoveredText());
+			  features.add(feature);
+//			  logger.info("add time feature: "+ entry.getValue().getCoveredText() + entry.getValue().getTimeClass());
+			  Feature indicator = new Feature("TimeXNearby", this.name);
+			  features.add(indicator);
+			  break;
+		  }
+	  }
+	  
+
+	  return features;
+  }
+
+}

Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXExtractor.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java?rev=1466170&r1=1466169&r2=1466170&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
Tue Apr  9 18:23:27 2013
@@ -33,6 +33,7 @@ import java.util.logging.Logger;
 import org.apache.ctakes.temporal.ae.DocTimeRelAnnotator;
 import org.apache.ctakes.typesystem.type.refsem.EventProperties;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.Feature;
@@ -40,6 +41,7 @@ import org.apache.uima.collection.Collec
 import org.apache.uima.jcas.JCas;
 import org.cleartk.classifier.jar.JarClassifierBuilder;
 import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
+//import org.cleartk.classifier.liblinear.LIBLINEARStringOutcomeDataWriter;
 import org.cleartk.eval.AnnotationStatistics;
 import org.uimafit.component.JCasAnnotator_ImplBase;
 import org.uimafit.factory.AggregateBuilder;
@@ -97,11 +99,12 @@ public class EvaluationOfEventProperties
   protected void train(CollectionReader collectionReader, File directory) throws Exception
{
     AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
     aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class));
+    aggregateBuilder.add(CopyFromGold.getDescription(TimeMention.class));
     aggregateBuilder.add(DocTimeRelAnnotator.createDataWriterDescription(
-        LIBSVMStringOutcomeDataWriter.class,
+    	LIBSVMStringOutcomeDataWriter.class,
         directory));
     SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
-    JarClassifierBuilder.trainAndPackage(directory, "-c", "1000");
+    JarClassifierBuilder.trainAndPackage(directory, "-c", "10000");
   }
 
   @Override
@@ -110,6 +113,7 @@ public class EvaluationOfEventProperties
       File directory) throws Exception {
     AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
     aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class));
+    aggregateBuilder.add(CopyFromGold.getDescription(TimeMention.class));
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearEventProperties.class));
     aggregateBuilder.add(DocTimeRelAnnotator.createAnnotatorDescription(directory));
 



Mime
View raw message