ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1493217 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae: TimeAnnotator.java feature/ParseSpanFeatureExtractor.java
Date Fri, 14 Jun 2013 19:12:47 GMT
Author: tmill
Date: Fri Jun 14 19:12:47 2013
New Revision: 1493217

URL: http://svn.apache.org/r1493217
Log:
Added parse feature extractor to regular time annotator.

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ParseSpanFeatureExtractor.java
  (with props)
Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java?rev=1493217&r1=1493216&r2=1493217&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
Fri Jun 14 19:12:47 2013
@@ -22,6 +22,7 @@ import java.io.File;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.ctakes.temporal.ae.feature.ParseSpanFeatureExtractor;
 import org.apache.ctakes.temporal.ae.feature.TimeWordTypeExtractor;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.textsem.TimeMention;
@@ -80,7 +81,10 @@ public class TimeAnnotator extends Tempo
   protected List<SimpleFeatureExtractor> tokenFeatureExtractors;
 
   protected List<CleartkExtractor> contextFeatureExtractors;
-
+  
+//  protected List<SimpleFeatureExtractor> parseFeatureExtractors;
+  protected ParseSpanFeatureExtractor parseExtractor;
+  
   private BIOChunking<BaseToken, TimeMention> timeChunking;
 
   @Override
@@ -97,6 +101,9 @@ public class TimeAnnotator extends Tempo
         new TypePathExtractor(BaseToken.class, "partOfSpeech"),
         new TimeWordTypeExtractor());
 
+//    CombinedExtractor parseExtractors = new CombinedExtractor(
+//        new ParseSpanFeatureExtractor()
+//        );
     this.tokenFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
     this.tokenFeatureExtractors.add(allExtractors);
 
@@ -106,6 +113,9 @@ public class TimeAnnotator extends Tempo
         allExtractors,
         new Preceding(3),
         new Following(3)));
+//    this.parseFeatureExtractors = new ArrayList<ParseSpanFeatureExtractor>();
+//    this.parseFeatureExtractors.add(new ParseSpanFeatureExtractor());
+    parseExtractor = new ParseSpanFeatureExtractor();
   }
 
   @Override
@@ -146,6 +156,18 @@ public class TimeAnnotator extends Tempo
           String previousOutcome = index < 0 ? "O" : outcomes.get(index);
           features.add(new Feature("PreviousOutcome_" + i, previousOutcome));
         }
+        // features from dominating parse tree
+//        for(SimpleFeatureExtractor extractor : this.parseFeatureExtractors){
+        BaseToken startToken = token;
+        for(int i = tokenIndex-1; i >= 0; --i){
+          String outcome = outcomes.get(i);
+          if(outcome.equals("O")){
+            break;
+          }
+          startToken = tokens.get(i);
+        }
+        features.addAll(parseExtractor.extract(jCas, startToken.getBegin(), token.getEnd()));
+//        }
         // if training, write to data file
         if (this.isTraining()) {
           String outcome = outcomes.get(tokenIndex);

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ParseSpanFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ParseSpanFeatureExtractor.java?rev=1493217&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ParseSpanFeatureExtractor.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ParseSpanFeatureExtractor.java
Fri Jun 14 19:12:47 2013
@@ -0,0 +1,62 @@
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.classifier.Feature;
+
+public class ParseSpanFeatureExtractor  {
+
+  public List<Feature> extract(JCas jcas, int begin, int end)
+      {
+    List<Feature> feats = new ArrayList<Feature>();
+    
+    TreebankNode domNode = AnnotationTreeUtils.annotationNode(jcas, begin, end);
+    if(domNode != null){
+      feats.add(new Feature("DominatingTreeCat", domNode.getNodeType()));
+      if(domNode.getNodeTags() != null){
+        for(int ind = 0; ind < domNode.getNodeTags().size(); ind++){
+          String tag = domNode.getNodeTags(ind);
+//          if(tag.equals("TMP")){
+            feats.add(new Feature("DominatingTmpTag", tag));
+//          }
+        }
+      }
+      TreebankNode parent = domNode.getParent();
+      if(parent != null){
+        feats.add(new Feature("DominatingTreeParent", domNode.getNodeType()));
+        do{
+          if(parent.getNodeTags() != null){
+            for(int ind = 0; ind < parent.getNodeTags().size(); ind++){
+              String tag = parent.getNodeTags(ind);
+//              if(tag.equals("TMP")){
+                feats.add(new Feature("DominatingAncestorTmpTag", tag));
+//              }
+            }
+          }
+          parent = parent.getParent();
+        }while(parent != null);
+      }
+      
+      if(domNode.getLeaf()){
+        feats.add(new Feature("DominatingIsLeaf"));
+      }else{
+        StringBuffer buffer = new StringBuffer();
+        for(int i = 0; i < domNode.getChildren().size(); i++){
+          buffer.append(domNode.getChildren(i).getNodeType());
+          buffer.append("_");
+          feats.add(new Feature("DominatingChildBag" + domNode.getChildren(i).getNodeType()));
+        }
+        feats.add(new Feature("DominatingProduction", buffer.toString()));
+      }
+      if(domNode.getBegin() == begin && domNode.getEnd() == end){
+        feats.add(new Feature("DominatingExactMatch"));
+      }
+    }
+    return feats;
+  }
+
+}

Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ParseSpanFeatureExtractor.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain



Mime
View raw message