ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1696466 - /ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java
Date Tue, 18 Aug 2015 17:38:42 GMT
Author: dligach
Date: Tue Aug 18 17:38:42 2015
New Revision: 1696466

URL: http://svn.apache.org/r1696466
Log:
printing entire senteces now instead of a few words on the sides of the match

Modified:
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java?rev=1696466&r1=1696465&r2=1696466&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java Tue
Aug 18 17:38:42 2015
@@ -18,9 +18,9 @@ public class SearchUtility {
   
   public static void main(String[] args) throws IOException {
 
-    final int maxHits = 250;
+    final int maxHits = 100;
     final String searchField = "content";
-    final String indexLocation = "/Users/Dima/Boston/Data/Mimic/Index/";
+    final String indexLocation = "/Users/dima/Boston/Data/DeepPhe/Index/";
 
     String queryText = JOptionPane.showInputDialog("Enter query");
     
@@ -39,36 +39,11 @@ public class SearchUtility {
     for(ScoreDoc scoreDoc : scoreDocs) {
       Document document = indexSearcher.doc(scoreDoc.doc);
       String text = document.get(searchField).toLowerCase().replace('\n', ' ');
-      String context = getContext(queryText, text, 20);
-      System.out.println(context);
+      System.out.println(text);
     }
     
     // indexSearcher.close();
     System.out.println("total hits: " + scoreDocs.length);
   }
-  
-  /**
-   * Get context for a string. Return "" if string not found in text.
-   * 
-   * TODO: Occasionally no context is found when the indexer removed certain
-   * characters which still exist in the source text. E.g. when "... pain, and swelling"
-   * is in the source document, the query "pain and swelling" will return this document.
-   * However, this method will not find the occurence of "pain and swelling" in the
-   * document because of the comma.
-   */
-  public static String getContext(String string, String text, int characterWindow) {
-    
-    String noEOL = text.replace('\n', ' ');
-    int begin = noEOL.indexOf(string);
-    if(begin == -1) {
-      return "";
-    }
-    
-    int end = begin + string.length();
-    int contextBegin = Math.max(0, begin - characterWindow);
-    int contextEnd = Math.min(text.length(), end + characterWindow);
-    
-    return noEOL.substring(contextBegin, contextEnd);
-  }
 }
 



Mime
View raw message