incubator-ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From stevenbeth...@apache.org
Subject svn commit: r1443859 - in /incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/EventAnnotator.java ae/TemporalEntityAnnotator_ImplBase.java ae/TimeAnnotator.java eval/Evaluation_ImplBase.java
Date Fri, 08 Feb 2013 07:37:02 GMT
Author: stevenbethard
Date: Fri Feb  8 07:37:01 2013
New Revision: 1443859

URL: http://svn.apache.org/r1443859
Log:
Skips sections that are not annnotated for events or times.

Added:
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalEntityAnnotator_ImplBase.java
  (with props)
Modified:
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java?rev=1443859&r1=1443858&r2=1443859&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
(original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
Fri Feb  8 07:37:01 2013
@@ -36,6 +36,7 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.syntax.Chunk;
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
@@ -66,7 +67,7 @@ import com.google.common.base.Predicate;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 
-public class EventAnnotator extends CleartkAnnotator<String> {
+public class EventAnnotator extends TemporalEntityAnnotator_ImplBase {
 
   public static final String PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE = "ProbabilityOfKeepingANegativeExample";
 
@@ -198,7 +199,7 @@ public class EventAnnotator extends Clea
   }
 
   @Override
-  public void process(JCas jCas) throws AnalysisEngineProcessException {
+  public void process(JCas jCas, Segment segment) throws AnalysisEngineProcessException {
     PredicateArgumentExtractor predicateArgumentExtractor = new PredicateArgumentExtractor(jCas);
 
     Random rand = new Random();
@@ -207,7 +208,7 @@ public class EventAnnotator extends Clea
     SMOTEplus smote = new SMOTEplus((int)Math.ceil(this.smoteNumOfNeighbors));
         
     // classify tokens within each sentence
-    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+    for (Sentence sentence : JCasUtil.selectCovered(jCas, Sentence.class, segment)) {
       List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);
 
       // during training, the list of all outcomes for the tokens

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalEntityAnnotator_ImplBase.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalEntityAnnotator_ImplBase.java?rev=1443859&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalEntityAnnotator_ImplBase.java
(added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalEntityAnnotator_ImplBase.java
Fri Feb  8 07:37:01 2013
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.util.Set;
+
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.collect.Sets;
+
+public abstract class TemporalEntityAnnotator_ImplBase extends CleartkAnnotator<String>
{
+
+  private static final Set<String> SEGMENTS_TO_SKIP = Sets.newHashSet("20104", "20105",
"20116", "20138");
+
+  @Override
+  public void process(JCas jCas) throws AnalysisEngineProcessException {
+    for (Segment segment : JCasUtil.select(jCas, Segment.class)) {
+      if (!SEGMENTS_TO_SKIP.contains(segment.getId())) {
+        this.process(jCas, segment);
+      }
+    }
+  }
+  
+  public abstract void process(JCas jCas, Segment segment) throws AnalysisEngineProcessException;
+
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalEntityAnnotator_ImplBase.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalEntityAnnotator_ImplBase.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java?rev=1443859&r1=1443858&r2=1443859&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
(original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
Fri Feb  8 07:37:01 2013
@@ -24,6 +24,7 @@ import java.util.List;
 
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
@@ -50,7 +51,7 @@ import org.cleartk.classifier.jar.Generi
 import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.util.JCasUtil;
 
-public class TimeAnnotator extends CleartkAnnotator<String> {
+public class TimeAnnotator extends TemporalEntityAnnotator_ImplBase {
 
   public static AnalysisEngineDescription createDataWriterDescription(
       Class<? extends DataWriter<String>> dataWriterClass,
@@ -105,9 +106,9 @@ public class TimeAnnotator extends Clear
   }
 
   @Override
-  public void process(JCas jCas) throws AnalysisEngineProcessException {
+  public void process(JCas jCas, Segment segment) throws AnalysisEngineProcessException {
     // classify tokens within each sentence
-    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+    for (Sentence sentence : JCasUtil.selectCovered(jCas, Sentence.class, segment)) {
       List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);
 
       // during training, the list of all outcomes for the tokens

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java?rev=1443859&r1=1443858&r2=1443859&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
(original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
Fri Feb  8 07:37:01 2013
@@ -22,6 +22,8 @@ import java.io.File;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.ctakes.chunker.ae.Chunker;
 import org.apache.ctakes.chunker.ae.DefaultChunkCreator;
@@ -29,7 +31,6 @@ import org.apache.ctakes.chunker.ae.adju
 import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
 import org.apache.ctakes.core.ae.OverlapAnnotator;
 import org.apache.ctakes.core.ae.SentenceDetector;
-import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
 import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
 import org.apache.ctakes.core.resource.FileResourceImpl;
 import org.apache.ctakes.core.resource.JdbcConnectionResourceImpl;
@@ -45,6 +46,7 @@ import org.apache.ctakes.temporal.ae.THY
 import org.apache.ctakes.typesystem.type.syntax.Chunk;
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
 import org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.CAS;
@@ -162,7 +164,7 @@ public abstract class Evaluation_ImplBas
         break;
     }
     // identify segments
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SegmentsFromBracketedSectionTagsAnnotator.class));
     // identify sentences
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
         SentenceDetector.class,
@@ -366,4 +368,21 @@ public abstract class Evaluation_ImplBas
       }
     }
   }
+  
+  // replace this with SimpleSegmentWithTagsAnnotator if that code ever gets fixed
+  public static class SegmentsFromBracketedSectionTagsAnnotator extends JCasAnnotator_ImplBase
{
+    private static Pattern SECTION_PATTERN = Pattern.compile("(\\[start section id=\"?(.*?)\"?\\]).*?(\\[end
section id=\"?(.*?)\"?\\])", Pattern.DOTALL);
+
+    @Override
+    public void process(JCas jCas) throws AnalysisEngineProcessException {
+      Matcher matcher = SECTION_PATTERN.matcher(jCas.getDocumentText());
+      while (matcher.find()) {
+        Segment segment = new Segment(jCas);
+        segment.setBegin(matcher.start() + matcher.group(1).length());
+        segment.setEnd(matcher.end() - matcher.group(3).length());
+        segment.setId(matcher.group(2));
+        segment.addToIndexes();
+      }
+    }
+  }
 }



Mime
View raw message