ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1746738 - /ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
Date Fri, 03 Jun 2016 16:44:01 GMT
Author: dligach
Date: Fri Jun  3 16:44:01 2016
New Revision: 1746738

URL: http://svn.apache.org/viewvc?rev=1746738&view=rev
Log:
rewrote the code to get tokens between args; now can specify the size of context on both side
of arg1/arg2

Modified:
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java?rev=1746738&r1=1746737&r2=1746738&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
(original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
Fri Jun  3 16:44:01 2016
@@ -164,7 +164,8 @@ public class PositiveAndNegativeExampleP
               label = relation.getCategory().toLowerCase();
             }
 
-            String context = getSentenceWithMarkedArgs(systemView, sentence, mention1, mention2);

+            // String context = getSentenceWithMarkedArgs(systemView, sentence, mention1,
mention2);
+            String context = getTokensBetween(systemView, mention1, mention2, 0);
             String text = String.format("%s|%s", label, context);
             eventEventRelationsInSentence.add(text.toLowerCase());
           }
@@ -186,13 +187,13 @@ public class PositiveAndNegativeExampleP
       // for a few hundred examples, begin/end offsets don't both match token begin/end
       // so using an 'or' rather than 'and'
       if(baseToken.getBegin() == arg1.getBegin() || (baseToken.getEnd() == arg1.getEnd()))
{
-        tokens.add("<e1>");
+        tokens.add("<e>");
         tokens.add(baseToken.getCoveredText());
-        tokens.add("</e1>");
+        tokens.add("</e>");
       } else if(baseToken.getBegin() == arg2.getBegin() || (baseToken.getEnd() == arg2.getEnd()))
{
-        tokens.add("<e2>");
+        tokens.add("<e>");
         tokens.add(baseToken.getCoveredText());
-        tokens.add("</e2>");
+        tokens.add("</e>");
       } else {
         tokens.add(baseToken.getCoveredText());
       }
@@ -201,25 +202,25 @@ public class PositiveAndNegativeExampleP
     return String.join(" ", tokens).replaceAll("[\r\n]", " ");
   }
   
-  public static String getTokensBetween(JCas jCas, Annotation arg1, Annotation arg2)  {
+  /**
+   * Return tokens between arg1 and arg2 as string 
+   * @param contextSize number of tokens to include on the left of arg1 and on the right
of arg2
+   */
+  public static String getTokensBetween(JCas jCas, Annotation arg1, Annotation arg2, int
contextSize) {
     
-    // todo: use 2 tokens for things like 'during those hospitalizations ...'
-    // todo: ensure previous tokens are in the same sentence
-    StringBuilder tokens = new StringBuilder();
-    List<BaseToken> preceeding = JCasUtil.selectPreceding(jCas, BaseToken.class, arg1,
1);
-    tokens.append(preceeding.get(0).getCoveredText() + " ");
-    
-    List<BaseToken> baseTokens = JCasUtil.selectBetween(jCas, BaseToken.class, arg1,
arg2);
-    tokens.append("" + arg1.getCoveredText() + " ");
-    for(BaseToken baseToken : baseTokens) {
-      tokens.append(baseToken.getCoveredText() + " ");
+    List<String> tokens = new ArrayList<>();
+    for(BaseToken baseToken :  JCasUtil.selectPreceding(jCas, BaseToken.class, arg1, contextSize))
{
+      tokens.add(baseToken.getCoveredText());
+    }
+    tokens.add(arg1.getCoveredText());
+    for(BaseToken baseToken : JCasUtil.selectBetween(jCas, BaseToken.class, arg1, arg2))
{
+      tokens.add(baseToken.getCoveredText());
+    }
+    tokens.add(arg2.getCoveredText());
+    for(BaseToken baseToken : JCasUtil.selectFollowing(jCas, BaseToken.class, arg2, contextSize))
{
+      tokens.add(baseToken.getCoveredText());
     }
-    tokens.append("" + arg2.getCoveredText() + " ");
-
-    List<BaseToken> following = JCasUtil.selectFollowing(jCas, BaseToken.class, arg1,
1);
-    tokens.append(following.get(0).getCoveredText());
-
     
-    return tokens.toString().replaceAll("[\r\n]", " ");
+    return String.join(" ", tokens).replaceAll("[\r\n]", " ");
   }
 }



Mime
View raw message