ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1696618 - in /ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes: consumers/SentencePrinter.java index/IndexSentences.java
Date Wed, 19 Aug 2015 15:53:35 GMT
Author: dligach
Date: Wed Aug 19 15:53:35 2015
New Revision: 1696618

URL: http://svn.apache.org/r1696618
Log:
removing line breaks from sentences before indexing

Modified:
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/IndexSentences.java

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java?rev=1696618&r1=1696617&r2=1696618&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java
(original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java
Wed Aug 19 15:53:35 2015
@@ -68,7 +68,9 @@ public class SentencePrinter {
     public void process(JCas jCas) throws AnalysisEngineProcessException {
       
       for(Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
-        System.out.println("* " + sentence.getCoveredText());
+        String withLineBreaks = sentence.getCoveredText();
+        String noLineBreaks = withLineBreaks.replace("\n", "");
+        System.out.println(" * " + noLineBreaks);
         System.out.println();
       }
     }

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/IndexSentences.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/IndexSentences.java?rev=1696618&r1=1696617&r2=1696618&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/IndexSentences.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/IndexSentences.java Wed
Aug 19 15:53:35 2015
@@ -106,7 +106,9 @@ public class IndexSentences {
       
       for(Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
         Document document = new Document();
-        document.add(new Field("content", sentence.getCoveredText(), Field.Store.YES, Field.Index.ANALYZED,
Field.TermVector.YES));
+        String withLineBreaks = sentence.getCoveredText();
+        String noLineBreaks = withLineBreaks.replace("\n", "");
+        document.add(new Field("content", noLineBreaks, Field.Store.YES, Field.Index.ANALYZED,
Field.TermVector.YES));
         try {
           indexWriter.addDocument(document);
         } catch (IOException e) {



Mime
View raw message