ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1493806 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/CRFTimeAnnotator.java eval/EvaluationOfTimeSpans.java
Date Mon, 17 Jun 2013 15:20:10 GMT
Author: tmill
Date: Mon Jun 17 15:20:10 2013
New Revision: 1493806

URL: http://svn.apache.org/r1493806
Log:
Improvements to CRF-based time annotator.

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java?rev=1493806&r1=1493805&r2=1493806&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java
Mon Jun 17 15:20:10 2013
@@ -4,9 +4,11 @@ import java.io.File;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
 import org.apache.ctakes.temporal.ae.feature.ParseSpanFeatureExtractor;
 import org.apache.ctakes.temporal.ae.feature.TimeWordTypeExtractor;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
 import org.apache.ctakes.typesystem.type.textsem.TimeMention;
 import org.apache.ctakes.typesystem.type.textspan.Segment;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
@@ -18,7 +20,6 @@ import org.apache.uima.resource.Resource
 import org.cleartk.classifier.CleartkAnnotator;
 import org.cleartk.classifier.DataWriter;
 import org.cleartk.classifier.Feature;
-import org.cleartk.classifier.Instance;
 import org.cleartk.classifier.Instances;
 import org.cleartk.classifier.chunking.BIOChunking;
 import org.cleartk.classifier.feature.extractor.CleartkExtractor;
@@ -77,8 +78,8 @@ public class CRFTimeAnnotator extends Te
     this.timeChunking = new BIOChunking<BaseToken, TimeMention>(BaseToken.class, TimeMention.class);
     CombinedExtractor allExtractors = new CombinedExtractor(
         new CoveredTextExtractor(),
-        new CharacterCategoryPatternExtractor(PatternType.REPEATS_MERGED),
-        new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR),
+//        new CharacterCategoryPatternExtractor(PatternType.REPEATS_MERGED),
+//        new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR),
         new TypePathExtractor(BaseToken.class, "partOfSpeech"),
         new TimeWordTypeExtractor());
 
@@ -92,8 +93,8 @@ public class CRFTimeAnnotator extends Te
     this.contextFeatureExtractors.add(new CleartkExtractor(
         BaseToken.class,
         allExtractors,
-        new Preceding(3),
-        new Following(3)));
+        new Preceding(2),
+        new Following(2)));
 //    this.parseFeatureExtractors = new ArrayList<ParseSpanFeatureExtractor>();
 //    this.parseFeatureExtractors.add(new ParseSpanFeatureExtractor());
     parseExtractor = new ParseSpanFeatureExtractor();
@@ -152,7 +153,11 @@ public class CRFTimeAnnotator extends Te
 //          }
 //          startToken = tokens.get(i);
 //        }
-//        features.addAll(parseExtractor.extract(jCas, startToken.getBegin(), token.getEnd()));
+        TreebankNode preTerm = AnnotationTreeUtils.annotationNode(jCas, token);
+        features.addAll(parseExtractor.extract(jCas, token.getBegin(), token.getEnd()));
+        //if(preTerm != null && preTerm.getParent() != null){
+        //  features.addAll(parseExtractor.extract(jCas, preTerm.getParent().getBegin(),
preTerm.getParent().getEnd()));
+        //}
         
         // if training, write to data file
 //        if (this.isTraining()) {

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java?rev=1493806&r1=1493805&r2=1493806&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
Mon Jun 17 15:20:10 2013
@@ -68,7 +68,7 @@ public class EvaluationOfTimeSpans exten
     Map<Class<? extends JCasAnnotator_ImplBase>, String[]> annotatorTrainingArguments
= Maps.newHashMap();
     annotatorTrainingArguments.put(TimeAnnotator.class, new String[]{"-c", "0.1"});
     annotatorTrainingArguments.put(ConstituencyBasedTimeAnnotator.class, new String[]{"-c",
"0.1"});
-    annotatorTrainingArguments.put(CRFTimeAnnotator.class, new String[]{});
+    annotatorTrainingArguments.put(CRFTimeAnnotator.class, new String[]{"-p", "c2=0.1"});
     
     // run one evaluation per annotator class
     final Map<Class<?>, AnnotationStatistics<?>> annotatorStats = Maps.newHashMap();



Mime
View raw message