ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1711490 - /ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java
Date Fri, 30 Oct 2015 15:03:49 GMT
Author: dligach
Date: Fri Oct 30 15:03:49 2015
New Revision: 1711490

URL: http://svn.apache.org/viewvc?rev=1711490&view=rev
Log:
simplified code by getting rid of 'reject' state

Added:
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java
      - copied, changed from r1711353, ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/ListAndConjunctionAnnotatorPipeline.java

Copied: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java
(from r1711353, ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/ListAndConjunctionAnnotatorPipeline.java)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java?p2=ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java&p1=ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/ListAndConjunctionAnnotatorPipeline.java&r1=1711353&r2=1711490&rev=1711490&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/ListAndConjunctionAnnotatorPipeline.java
(original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java
Fri Oct 30 15:03:49 2015
@@ -43,10 +43,10 @@ import com.lexicalscope.jewel.cli.CliFac
 import com.lexicalscope.jewel.cli.Option;
 
 /**
- * 
+ * Pipeline for detecting very simple lists of anatomical sites.
  * @author dmitriy dligach
  */
-public class ListAndConjunctionAnnotatorPipeline {
+public class AnatomicalSiteListExtractorPipeline {
   
   static interface Options {
 
@@ -65,13 +65,18 @@ public class ListAndConjunctionAnnotator
 	}
 
   /**
-   * Detect simple lists and conjunctions.
+   * Implements a finate state machine for detecting 
+   * extremely simple lists and conjunctions of anatomical sites.
    * E.g. CT chest, abdomen and pelvis.
    *  
    * @author dmitriy dligach
    */
   public static class ListAndConjunctionAe extends JCasAnnotator_ImplBase {
-    
+
+    public enum State {
+      START, ANATSITE1, PUNCTUATION, ANATSITE2, ACCEPT
+    }
+
     @Override
     public void process(JCas jCas) throws AnalysisEngineProcessException {
       
@@ -83,60 +88,56 @@ public class ListAndConjunctionAnnotator
       }
       
       for(Sentence sentence : JCasUtil.select(systemView, Sentence.class)) {
-        
-        String currentState = "start";
-        String list = "";
-        
-        for(BaseToken token : JCasUtil.selectCovered(systemView, BaseToken.class, sentence))
{
-          String nextState = getNextState(systemView, currentState, token); 
-          if(nextState == "accept") {
-            System.out.println("found list in: " + sentence.getCoveredText());
-            break;
-          } else if(nextState != "reject") {
-            currentState = nextState;
-          } else {
-            currentState = "start";
-          }
-        }
-        
-        if(list != "") {
-          System.out.println(sentence.getCoveredText() + "/" + list);
+        State state = State.START;
+        for(BaseToken input : JCasUtil.selectCovered(systemView, BaseToken.class, sentence))
{
+          state = getNextState(systemView, state, input); 
+          if(state == State.ACCEPT) {
+            System.out.println("found a list in: " + sentence.getCoveredText());
+            state = State.START;
+          } 
         }
       }
     }
 
-    public String getNextState(JCas systemView, String currentState, BaseToken nextToken)
{
-      
-      Set<String> listConnectors = new HashSet<>(Lists.newArrayList("and", ","));
-      
-      String nextState = "reject";
-      int nextTokenSemType = getSemanticType(systemView, nextToken);
-      String nextTokenText = nextToken.getCoveredText().toLowerCase();
+    /*
+     * Compute the transition given current state and an input token.
+     */
+    public State getNextState(JCas systemView, State currentState, BaseToken inputToken)
{
+
+      // tokens that connect list elements
+      Set<String> listConnectors = new HashSet<>(Lists.newArrayList("and", "or",
","));
       
-      if(currentState == "start") {
-        if(nextTokenSemType == CONST.NE_TYPE_ID_ANATOMICAL_SITE) {
-          nextState = "as1";
+      State nextState;
+      int tokenSemType = getSemanticType(systemView, inputToken);
+      String tokenText = inputToken.getCoveredText().toLowerCase();
+      
+      if(currentState == State.START) {
+        if(tokenSemType == CONST.NE_TYPE_ID_ANATOMICAL_SITE) {
+          nextState = State.ANATSITE1;
         } else {
-          nextState = "reject";
+          nextState = State.START;
         }
-      } else if(currentState == "as1") {
-        if(listConnectors.contains(nextTokenText)) {
-          nextState = "punct";
+      } else if(currentState == State.ANATSITE1) {
+        if(listConnectors.contains(tokenText)) {
+          nextState = State.PUNCTUATION;
         } else {
-          nextState = "reject";
+          nextState = State.START;
         }
-      } else if(currentState == "punct") {
-        if(nextTokenSemType == CONST.NE_TYPE_ID_ANATOMICAL_SITE) {
-          nextState = "as2";
+      } else if(currentState == State.PUNCTUATION) {
+        if(tokenSemType == CONST.NE_TYPE_ID_ANATOMICAL_SITE) {
+          nextState = State.ANATSITE2;
         } else {
-          nextState = "reject";
+          nextState = State.START;
         } 
-      } else if(currentState == "as2") {
-        if(listConnectors.contains(nextTokenText)) {
-          nextState = "punct";
+      } else if(currentState == State.ANATSITE2) {
+        if(listConnectors.contains(tokenText)) {
+          nextState = State.PUNCTUATION;
         } else {
-          nextState = "accept";
+          nextState = State.ACCEPT;
         }
+      } else {
+        System.out.println("\nThis shouldn't happen!\n");
+        nextState = State.START;
       }
         
       return nextState;
@@ -145,12 +146,15 @@ public class ListAndConjunctionAnnotator
     public int getSemanticType(JCas systemView, BaseToken baseToken) {
       
       List<IdentifiedAnnotation> coveredIdentifiedAnnotations = 
-          JCasUtil.selectCovered(systemView, IdentifiedAnnotation.class, baseToken.getBegin(),
baseToken.getEnd());
-      
-      if(coveredIdentifiedAnnotations.size() < 1) {
-        return 0; // no type
+          JCasUtil.selectCovered(
+              systemView, 
+              IdentifiedAnnotation.class, 
+              baseToken.getBegin(), 
+              baseToken.getEnd());
+            if(coveredIdentifiedAnnotations.size() < 1) {
+        return CONST.NE_TYPE_ID_UNKNOWN; // no type
       } 
-      
+            
       return coveredIdentifiedAnnotations.get(0).getTypeID();
     }
   }



Mime
View raw message