ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1701295 - /ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java
Date Fri, 04 Sep 2015 17:12:18 GMT
Author: dligach
Date: Fri Sep  4 17:12:17 2015
New Revision: 1701295

URL: http://svn.apache.org/r1701295
Log:
printing local context instead of entire sentence

Modified:
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java?rev=1701295&r1=1701294&r2=1701295&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java Fri
Sep  4 17:12:17 2015
@@ -41,8 +41,8 @@ public class SearchUtility {
     for(ScoreDoc scoreDoc : scoreDocs) {
       Document document = indexSearcher.doc(scoreDoc.doc);
       String text = document.get(fieldName);
-      System.out.println(text);
-      System.out.println();
+      String context = getContext(queryText, text, 30);
+      System.out.println(context);
     }
 
     directory.close();
@@ -50,5 +50,29 @@ public class SearchUtility {
 
     System.out.println("total hits: " + scoreDocs.length);
   }
+  
+  /**
+   * Get context for a string. Return "" if string not found in text.
+   * 
+   * TODO: Occasionally no context is found when the indexer removed certain
+   * characters which still exist in the source text. E.g. when "... pain, and swelling"
+   * is in the source document, the query "pain and swelling" will return this document.
+   * However, this method will not find the occurence of "pain and swelling" in the
+   * document because of the comma.
+   */
+  public static String getContext(String string, String text, int characterWindow) {
+    
+    String noEOL = text.replace('\n', ' ');
+    int begin = noEOL.indexOf(string);
+    if(begin == -1) {
+      return "";
+    }
+    
+    int end = begin + string.length();
+    int contextBegin = Math.max(0, begin - characterWindow);
+    int contextEnd = Math.min(text.length(), end + characterWindow);
+    
+    return noEOL.substring(contextBegin, contextEnd);
+  }
 }
 



Mime
View raw message